MatplotLib

Different Types of Charts

Shotcuts: multiline comment/uncomment: 'ctrl' + '/'

Intro

In [3]:
import matplotlib.pyplot as plt

plt.plot([1,2,3,4,5],[4,5,6,7,8]) 
# both x,y should have same length
# print length to check both
plt.show() 

Label, Titles and Legend

In [4]:
x = [1,2,3,4,5]
y = [4,5,6,7,8]

plt.plot(x,y)
plt.xlabel('Plot x axix')
plt.ylabel('Plot y axis')
plt.title('Epic Graph') # Can use \n(new line) to fit the title

plt.show()
In [6]:
x = [1,2,3,4,5]
y1 = [4,5,6,7,8]
y2 = [5,3,2,1,0]

plt.plot(x,y1, label='Initial Line')
plt.plot(x,y2, label='Final Line')
plt.xlabel('Plot x axix')
plt.ylabel('Plot y axis')
plt.title('Epic Graph') # Can use \n(new line) to fit the title

plt.legend()

plt.show()

Bar Chart

In [10]:
x1 = [1,2,3,4,5]
x2 = [2,4,6,8,10]
y1 = [4,5,6,7,8]
y2 = [5,3,2,1,0]

plt.bar(x1,y1,label='one')
plt.bar(x2,y2,label='Second')

plt.xlabel('Plot x axix')
plt.ylabel('Plot y axis')
plt.title('Epic Graph') # Can use \n(new line) to fit the title

plt.legend()
plt.show()

Histogram

In [14]:
test_score = [55,45,60,78,98,75,43,67,89,95,96,99,78,87,93,72]
x = [x for x in range(len(test_score))]

# plt.bar(x,test_score)
# plt.show()

bins = [10,20,30,40,50,60,70,80,90,100]
plt.hist(test_score,bins,histtype='bar',rwidth=0.8)
plt.show()

bins = [10,20,30,40,50,60,70,80,90,100]
plt.hist(test_score,bins,histtype='bar',cumulative=True,rwidth=0.8)
plt.show()
In [79]:
import numpy as np
import matplotlib.pyplot as plt

# Fixing random state for reproducibility
np.random.seed(19680801)

mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)

# the histogram of the data
n, bins, patches = plt.hist(x, 50, density=True, facecolor='g', alpha=0.75)


plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title('Histogram of IQ')
plt.text(60, .025, r'$\mu=100,\ \sigma=15$')# prints mu and sigma, adding text
plt.xlim(40, 160)
plt.ylim(0, 0.03)
plt.grid(True)
plt.show()

Scatter Plot

In [25]:
test_score_1 = [55,45,60,78,98,75,43,67,89,95,96,99,78,87,93,72]
test_score_2 = [19,11,23,45,67,89,90,34,56,67,65,54,34,23,56,78]
time_spent = [11,10,15,45,67,80,23,34,25,16,19,20,23,45,16,19]

plt.scatter(test_score_1,time_spent,marker='o',color='m')
plt.scatter(test_score_2,time_spent,marker='p',color='g')
plt.show()

Stack Plot

In [30]:
year=[1,2,3,4,5,6,7,8,9,10]
taxes=[17,18,40,43,44,8,43,32,39,30]
overhead=[30,22,9,29,17,12,14,24,49,35]
entertainment=[41,32,27,13,19,12,22,18,28,20]

plt.plot([],[],color='m',label='Taxes')
plt.plot([],[],color='c',label='Overhead')
plt.plot([],[],color='g',label='Entertainment')

plt.stackplot(year,taxes,overhead,entertainment,colors=['m','c','g'])
plt.legend()
plt.show()

Pie Chart

In [49]:
labels='Taxes','Overhead','Entertainment'
size=[25,32,12]
colors=['c','m','b']

plt.pie(size, labels=labels, colors=colors, startangle=0, autopct='%1.1f%%', explode=(0,0.1,0), shadow=True)
plt.axis('equal')

plt.show()

Basic Customization Options

In [53]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import urllib
import numpy as np

def graph_data(stock):
    print('Currently pulling:', stock)
    url = 'http://chartapi.finance.yahoo.com/instrument/1.0/'+stock+'/chartdata;type=quote;range=10y/csv'
    print(url)
    
stock = input('Stock to plot:')
graph_data(stock)
Stock to plot:tsla
Currently pulling: tsla
http://chartapi.finance.yahoo.com/instrument/1.0/tsla/chartdata;type=quote;range=10y/csv
In [55]:
import pandas as pd
df = pd.read_csv(r"C:\Users\saurabhkumar9\Desktop\Python Training\Forecasting\4. Udemy - Python for Time Series Data Analysis\Data\starbucks.csv",
                index_col='Date', parse_dates=True)
C:\Users\saurabhkumar9\AppData\Local\Continuum\anaconda2\envs\spyder\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
  return f(*args, **kwds)
In [56]:
df.head()
Out[56]:
Close Volume
Date
2015-01-02 38.0061 6906098
2015-01-05 37.2781 11623796
2015-01-06 36.9748 7664340
2015-01-07 37.8848 9732554
2015-01-08 38.4961 13170548
In [83]:
title='Starbucks Closing Stock Prices'
ylabel='Closing Price (USD)'
xlabel='Closing Date'

ax = df['Close']['2017-01-01':'2017-03-01'].plot(figsize=(12,6),title=title,color='r',ls='--') 
ax.grid(True, linestyle='--',color='g');
#ax.fill_between(df['Date'],df['Close'],55,where=(df['Close']<55),facecolor='g',alpha=0.5)
ax.autoscale(axis='x',tight=True);
ax.xaxis.label.set_color('b')
ax.yaxis.label.set_color('b')
ax.set(xlabel=xlabel, ylabel=ylabel);

Skipped anotation, animation etc.

Advanced Plots

Subplot

In [104]:
import random
import matplotlib.pyplot as ptl

fig = plt.figure(figsize=(10,6)) # first define figure

def create_plot():
    xs=[]
    ys=[]
    
    for i in range(10):
        x=i
        y=random.randrange(8)
        
        xs.append(x)
        ys.append(y)
    return xs, ys

ax1=fig.add_subplot(4,2,1) # add_subplot(nrows, ncols, index:1, **kwargs)
ax2=fig.add_subplot(4,2,2) # add_subplot(nrows, ncols, index:2, **kwargs)
ax3=fig.add_subplot(4,2,3) # add_subplot(nrows, ncols, index:3, **kwargs)
ax4=fig.add_subplot(4,2,4) # add_subplot(nrows, ncols, index:3, **kwargs)

x,y = create_plot() # assigning x and y value
ax1.plot(x,y)
x,y = create_plot() # assigning x and y value
ax2.plot(x,y)
x,y = create_plot() # assigning x and y value
ax3.plot(x,y)
x,y = create_plot() # assigning x and y value
ax4.plot(x,y)

plt.show()
In [106]:
import random
import matplotlib.pyplot as ptl

fig = plt.figure(figsize=(10,6)) # first define figure

def create_plot():
    xs=[]
    ys=[]
    
    for i in range(10):
        x=i
        y=random.randrange(8)
        
        xs.append(x)
        ys.append(y)
    return xs, ys

ax1=fig.add_subplot(5,1,1) # add_subplot(nrows, ncols, index:1, **kwargs)
ax2=fig.add_subplot(5,1,2) # add_subplot(nrows, ncols, index:2, **kwargs)
ax3=fig.add_subplot(5,3,7) # add_subplot(nrows, ncols, index:3, **kwargs)
ax4=fig.add_subplot(5,3,8) # add_subplot(nrows, ncols, index:3, **kwargs)
ax5=fig.add_subplot(5,3,9) # add_subplot(nrows, ncols, index:3, **kwargs)

x,y = create_plot() # assigning x and y value
ax1.plot(x,y)
x,y = create_plot() # assigning x and y value
ax2.plot(x,y)
x,y = create_plot() # assigning x and y value
ax3.plot(x,y)
x,y = create_plot() # assigning x and y value
ax4.plot(x,y)
x,y = create_plot() # assigning x and y value
ax5.plot(x,y)

plt.show()

Untitled.jpg

Subplotgrid

In [107]:
import random
import matplotlib.pyplot as ptl

fig = plt.figure(figsize=(10,6)) # first define figure

def create_plot():
    xs=[]
    ys=[]
    
    for i in range(10):
        x=i
        y=random.randrange(8)
        
        xs.append(x)
        ys.append(y)
    return xs, ys

ax1=plt.subplot2grid((6,1),(0,0),rowspan=1,colspan=1)
ax2=plt.subplot2grid((6,1),(1,0),rowspan=4,colspan=1)
ax3=plt.subplot2grid((6,1),(5,0),rowspan=4,colspan=1)

x,y = create_plot() # assigning x and y value
ax1.plot(x,y)
x,y = create_plot() # assigning x and y value
ax2.plot(x,y)
x,y = create_plot() # assigning x and y value
ax3.plot(x,y)

plt.show()

Untitled.jpg

Geographical Plotting

Plotly

Intro

In [96]:
import plotly
plotly.__version__
Out[96]:
'4.4.1'
In [97]:
#plotly.offline doesn't push your charts to the clouds
import plotly.offline as pyo
#allows us to create the Data and Figure objects
from plotly.graph_objs import *
#plotly.plotly pushes your charts to the cloud  
import chart_studio.plotly.plotly as py

#pandas is a data analysis library
import pandas as pd
from pandas import DataFrame
In [98]:
pyo.offline.init_notebook_mode() # run at the start of every ipython 

Line Plot

Line Plot Basic

At a high level, the structure of the chart could be represented like this:

Figure = {'layout' : {<information about the chart's layout>}, 'data' : [{trace1}, {trace2}, {trace3}] }

Traces are a list of dictionaries which contain data and layouts are list of dictionary with information of chart

In [5]:
trace1 = {'type' : 'scatter',
        'x' : [0,1,2,3,4,5,6,7,8,9],
        'y' : [0,1,2,3,4,5,6,7,8,9],
        'name' : 'trace1',
        'mode' : 'lines'}

data = Data([trace1])

layout = {'title' : "My first plotly line chart",
         'xaxis' : {'title' : 'X Values'},
         'yaxis' : {'title' : 'Y Values'}}


fig = Figure(data = data, layout = layout)

pyo.iplot(fig)

Python's native dict.update() helps in updating the traces or layout

In [6]:
trace1
Out[6]:
{'type': 'scatter',
 'x': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 'y': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 'name': 'trace1',
 'mode': 'lines'}
In [7]:
updatedY = {'y' : [0,3,7,2,6,9,1,4,5,8]}
trace1.update(updatedY)
trace1
Out[7]:
{'type': 'scatter',
 'x': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 'y': [0, 3, 7, 2, 6, 9, 1, 4, 5, 8],
 'name': 'trace1',
 'mode': 'lines'}
In [8]:
layout
Out[8]:
{'title': 'My first plotly line chart',
 'xaxis': {'title': 'X Values'},
 'yaxis': {'title': 'Y Values'}}
In [9]:
layout.update({'title' : 'My second Plotly line chart'})
In [10]:
layout
Out[10]:
{'title': 'My second Plotly line chart',
 'xaxis': {'title': 'X Values'},
 'yaxis': {'title': 'Y Values'}}
In [11]:
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Multiple chart using trace

In [12]:
# We can create multiple charts by changing the trace
expenseData = pd.read_csv("http://richard-muir.com/data/public/csv/NumberOfMPsExpenseClaims_2010-2015.csv")
In [13]:
expenseData.head()
Out[13]:
month NumberOfClaims2010 NumberOfClaims2011 NumberOfClaims2012 NumberOfClaims2013 NumberOfClaims2014 NumberOfClaims2015
0 1 14982 16594 18280 18096 15864 2728.0
1 2 14168 16060 16632 15954 14272 0.0
2 3 18678 19507 20934 21204 16946 1.0
3 4 3 13056 15158 18070 16146 5708.0
4 5 6404 15454 17693 17586 15725 11556.0
In [14]:
trace1 = {'type' : 'scatter',
         'mode' : 'lines',
         'name' : 'trace1',
         'x' : expenseData['month'],
         'y' : expenseData['NumberOfClaims2010']}


data = Data([trace1])

layout = {'title' : 'Expenses by month in 2010',
         'xaxis' : {'title' : 'Month'},
         'yaxis' : {'title' : 'Number of Claims'}}

fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Multiple traces

In [15]:
trace_2010 = {'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims2010'],
             'name' : '2010',
             'mode' : 'lines'}

trace_2011 = {'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims2011'],
             'name' : '2011',
             'mode' : 'lines'}

trace_2012 = {'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims2012'],
             'name' : '2012',
             'mode' : 'lines'}

data = Data([trace_2010, trace_2011, trace_2012])


layout = {'title' : 'Expense claims by month for 2010 - 2012',
         'xaxis' : {'title' : 'Month'},
         'yaxis' : {'title' : 'Number of expense claims'}}


fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Creating traces with a For Loop

In [16]:
traces = []
for i in range(2010, 2016):
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(i)],
             'name' : i,
             'mode' : 'lines'})
traces
Out[16]:
[{'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     14982
  1     14168
  2     18678
  3         3
  4      6404
  5     11767
  6     14721
  7      8678
  8     14033
  9     15331
  10    17173
  11    12768
  Name: NumberOfClaims2010, dtype: int64, 'name': 2010, 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     16594
  1     16060
  2     19507
  3     13056
  4     15454
  5     17104
  6     15446
  7     11036
  8     14487
  9     17025
  10    18238
  11    14670
  Name: NumberOfClaims2011, dtype: int64, 'name': 2011, 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     18280
  1     16632
  2     20934
  3     15158
  4     17693
  5     16914
  6     16474
  7     11625
  8     15488
  9     16500
  10    19149
  11    14820
  Name: NumberOfClaims2012, dtype: int64, 'name': 2012, 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     18096
  1     15954
  2     21204
  3     18070
  4     17586
  5     18859
  6     17541
  7     12182
  8     15594
  9     18882
  10    18173
  11    15040
  Name: NumberOfClaims2013, dtype: int64, 'name': 2013, 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     15864
  1     14272
  2     16946
  3     16146
  4     15725
  5     17207
  6     16568
  7     10784
  8     14778
  9     15783
  10    15844
  11    13679
  Name: NumberOfClaims2014, dtype: int64, 'name': 2014, 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0      2728.0
  1         0.0
  2         1.0
  3      5708.0
  4     11556.0
  5     17555.0
  6     15992.0
  7     10152.0
  8     14207.0
  9     14909.0
  10    15070.0
  11     9104.0
  Name: NumberOfClaims2015, dtype: float64, 'name': 2015, 'mode': 'lines'}]
In [17]:
data = Data(traces)
In [18]:
layout = {'title' : 'Expense claims by month for 2010-2015',
         'xaxis' : {'title' : 'month'},
         'yaxis' : {'title' : 'Number of expense claims'}}
In [19]:
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Loop through the column names in a DataFrame

In [20]:
for col in expenseData.columns.tolist():
    print(col)
month
NumberOfClaims2010
NumberOfClaims2011
NumberOfClaims2012
NumberOfClaims2013
NumberOfClaims2014
NumberOfClaims2015
In [21]:
for col in expenseData.columns.tolist():
    if col != 'month':
        print(col)
NumberOfClaims2010
NumberOfClaims2011
NumberOfClaims2012
NumberOfClaims2013
NumberOfClaims2014
NumberOfClaims2015
In [22]:
traces = []
for col in expenseData.columns.tolist():
    if col != 'month':
        traces.append({'type' : 'scatter',
                 'x' : expenseData['month'],
                 'y' : expenseData[col],
                 'name' : col,
                 'mode' : 'lines'})
traces
Out[22]:
[{'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     14982
  1     14168
  2     18678
  3         3
  4      6404
  5     11767
  6     14721
  7      8678
  8     14033
  9     15331
  10    17173
  11    12768
  Name: NumberOfClaims2010, dtype: int64, 'name': 'NumberOfClaims2010', 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     16594
  1     16060
  2     19507
  3     13056
  4     15454
  5     17104
  6     15446
  7     11036
  8     14487
  9     17025
  10    18238
  11    14670
  Name: NumberOfClaims2011, dtype: int64, 'name': 'NumberOfClaims2011', 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     18280
  1     16632
  2     20934
  3     15158
  4     17693
  5     16914
  6     16474
  7     11625
  8     15488
  9     16500
  10    19149
  11    14820
  Name: NumberOfClaims2012, dtype: int64, 'name': 'NumberOfClaims2012', 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     18096
  1     15954
  2     21204
  3     18070
  4     17586
  5     18859
  6     17541
  7     12182
  8     15594
  9     18882
  10    18173
  11    15040
  Name: NumberOfClaims2013, dtype: int64, 'name': 'NumberOfClaims2013', 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0     15864
  1     14272
  2     16946
  3     16146
  4     15725
  5     17207
  6     16568
  7     10784
  8     14778
  9     15783
  10    15844
  11    13679
  Name: NumberOfClaims2014, dtype: int64, 'name': 'NumberOfClaims2014', 'mode': 'lines'},
 {'type': 'scatter', 'x': 0      1
  1      2
  2      3
  3      4
  4      5
  5      6
  6      7
  7      8
  8      9
  9     10
  10    11
  11    12
  Name: month, dtype: int64, 'y': 0      2728.0
  1         0.0
  2         1.0
  3      5708.0
  4     11556.0
  5     17555.0
  6     15992.0
  7     10152.0
  8     14207.0
  9     14909.0
  10    15070.0
  11     9104.0
  Name: NumberOfClaims2015, dtype: float64, 'name': 'NumberOfClaims2015', 'mode': 'lines'}]
In [23]:
layout = {'title' : 'Expense claims by month for 2010-2015',
         'xaxis' : {'title' : 'month'},
         'yaxis' : {'title' : 'Number of expense claims'}}
In [24]:
data = Data(traces)
fig = Figure(data = data, layout = layout)

pyo.iplot(fig)

Changing line colour & thickness

Plotly understands the standard CSS colour names, HEX codes or rgba codes. I'll use a combination of the CSS Colour Names and rgba in this course. The CSS colour names are really easy and intuitive to use and are helpful for quick examples, whilst using rgba allows us to set the opacity of a colour, as well as providing very fine control over the shade and hue.

To change the colour of a trace we need to add a 'marker' key to the trace. The value associated with the 'marker' key will be a dictionary which can contain a 'color' key:

trace = {'type' : 'scatter', 'marker' : {'color' : <CSS Color Name/HEX Code/RGB code/RGBA code/Variable>}

Approach 1 for color

In [25]:
traces = []
for i in range(2010, 2016):
    if i == 2015:
        colour = 'Red'
    else:
        colour = 'Grey'
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(i)],
             'name' : i,
             'marker' : {'color' : colour},
             'mode' : 'lines'})
In [26]:
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Approach 2 for color

Alternatively, we can define a function to return the colour which we want.

In [27]:
def chooseColour(yr):
    if yr == 2015:
        return 'Blue'
    else:
        return 'Grey'
In [28]:
traces = []
for i in range(2010, 2016):
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(i)],
             'name' : i,
             'marker' : {'color' : chooseColour(i)},
             'mode' : 'lines'})
In [29]:
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

For width

trace = {'type' : 'scatter', 'line' : {'width' : <Float/Integer/Variable>}

Just like color we can create variable or function to do this.

In [30]:
traces = []
for i in range(2010, 2016):
    if i == 2015:
        width = 2
        colour = 'Red'
    else:
        width = 1
        colour = 'Grey'
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(i)],
             'name' : i,
             'line' : {'width' : width},
             'marker' : {'color' : colour},
             'mode' : 'lines'})
In [31]:
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Line Type

Solidity of a line can be changed by using 'dash' option in the 'line' dictionary in the trace:

trace = {'type' : 'scatter', 'line' : {'dash' : <Dash string/Dash length in pixels/Variable>}

Some valid dash strings are: 'solid' 'dash' 'dot' 'dashdot'

The enumerate() creates a variable which holds the value of each item's index in the list in addition to the variable which holds the actual value of the variable. This value starts at 0 for the first item and increments by one for each subsequent item.

In [32]:
dashes = ['dash', 'dot', 'dashdot']

for i, yr in enumerate(range(2010, 2016)):
    print(i, yr)
0 2010
1 2011
2 2012
3 2013
4 2014
5 2015
In [33]:
for i, yr in enumerate(range(2010, 2016)):
    print(dashes[(i//2)])
dash
dash
dot
dot
dashdot
dashdot
In [34]:
traces = []
for i, yr in enumerate(range(2010, 2016)):
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(yr)],
             'line' : {'dash' : dashes[i//2]},
             'name' : yr,
             'mode' : 'lines'})
In [35]:
layout = {'title' : 'Expense claims by month for 2010-2015',
         'xaxis' : {'title' : 'month'},
         'yaxis' : {'title' : 'Number of expense claims'}}
In [36]:
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Marker Symbols

'marker' sub-dictionary to change the colour of trace. To change the marker symbol, add a 'symbol' key to this dictionary

trace = {'type' : 'scatter', 'marker' : {'symbol' : <Marker symbol string/Variable>}

We need to tell Plotly that we want to use markers in addition to lines by changing the value for 'mode' from 'lines' to 'lines+markers':

trace = {'type' : 'scatter', 'mode' : 'lines+markers'}

In [37]:
markerSymbols = ['circle','square','diamond','x','triangle-up','cross']

traces = []
for i, yr in enumerate(range(2010, 2016)):
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(yr)],
             'marker' : {'symbol' : markerSymbols[i]},
             'name' : yr,
             'mode' : 'lines+markers'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
In [38]:
# With Open Option
In [39]:
markerSymbolsOpen = ['circle-open','square-open','diamond-open','x-open','triangle-up-open','cross-open']
traces = []
for i, yr in enumerate(range(2010, 2016)):
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(yr)],
             'marker' : {'symbol' : markerSymbolsOpen[i]},
             'name' : yr,
             'mode' : 'lines+markers'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
In [40]:
# Change Size
In [41]:
markerSymbolsDot = ['circle-dot','square-dot','diamond-dot','x-dot','triangle-up-dot','cross-dot']
traces = []
for i, yr in enumerate(range(2010, 2016)):
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(yr)],
             'marker' : {'symbol' : markerSymbolsDot[i], 'size' : 10, 'line' : {'width' : 1}},
             'name' : yr,
             'mode' : 'markers'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
In [42]:
markerSymbolsOpenDot = ['circle-open-dot','square-open-dot','diamond-open-dot','x-open-dot','triangle-up-open-dot','cross-open-dot']
traces = []
for i, yr in enumerate(range(2010, 2016)):
    traces.append({'type' : 'scatter',
             'x' : expenseData['month'],
             'y' : expenseData['NumberOfClaims' + str(yr)],
             'marker' : {'symbol' : markerSymbolsOpenDot[i], 'size' : 10, 'line' : {'width' : 1}},
             'name' : yr,
             'mode' : 'markers'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Applying Smoothing

Before one apply smoothing to a line chart, one should consider very carefully why you are doing it, and what information one may lose by doing so. Essentially, plots data which actually doenot exist.

To smooth a line, we need to set the 'shape' option to 'spline' and pass a float value between 0 and 1.3 to the 'smoothing' option. Both of these options are inside the 'line' dictionary. Smoothing only works on scatterplots.

trace = {'type' : 'scatter', 'line' : {'smoothing' : <float between 0 and 1.3>, 'shape' : 'spline'}}

In [43]:
xVals = [1,2,3,4,5,6,7,8,9,10]
yVals = [11,15,8,21,14,28,19,10,5,20]

unSmoothTrace = {'type' : 'scatter',
               'x' : xVals,
               'y' : yVals,
               'mode' : 'lines',
               'line' : {'smoothing' : 0,
                        'shape' : 'spline'},
                'name' : 'UnSmooth'}

smoothTrace = {'type' : 'scatter',
               'x' : xVals,
               'y' : yVals,
               'mode' : 'lines',
               'line' : {'smoothing' : 1.3,
                        'shape' : 'spline'},
              'name' : 'Smooth'}

data = Data([unSmoothTrace, smoothTrace])

pyo.iplot(data)
In [44]:
updateSmoothTrace = {'smoothing' : 0.8}
smoothTrace['line'].update(updateSmoothTrace)
data = Data([unSmoothTrace, smoothTrace])

fig = Figure(data = data)
pyo.iplot(fig)
In [45]:
updateSmoothTrace = {'smoothing' : 0.3}
smoothTrace['line'].update(updateSmoothTrace)
data = Data([unSmoothTrace, smoothTrace])
fig = Figure(data = data)
pyo.iplot(fig)
In [46]:
from pandas_datareader import data
from scipy import signal

appleVals = data.get_data_yahoo('AAPL','1/1/2012','1/1/2013')
appleVals.head()
Out[46]:
High Low Open Close Volume Adj Close
Date
2012-01-03 58.928570 58.428570 58.485714 58.747143 75555200.0 51.115936
2012-01-04 59.240002 58.468571 58.571430 59.062859 65005500.0 51.390648
2012-01-05 59.792858 58.952858 59.278572 59.718571 67817400.0 51.961189
2012-01-06 60.392857 59.888573 59.967144 60.342857 79573200.0 52.504375
2012-01-09 61.107143 60.192856 60.785713 60.247143 98506100.0 52.421093
In [47]:
unSmoothApple = {'type' : 'scatter',
               'x' : appleVals.index,
               'y' : appleVals['Close']+50,
               'mode' : 'lines',
               'line' : {'smoothing' : 0,
                         'shape' : 'spline'},
               'name' : 'Apple (UnSmooth)'}

smoothApple = {'type' : 'scatter',
               'x' : appleVals.index,
               'y' : appleVals['Close'],
               'mode' : 'lines',
               'line' : {'smoothing' : 1.3,
                         'shape' : 'spline'},
              'name' : 'Apple (Smooth)'}

layout = {'title' : 'Stock closing prices for Apple in 2012',
         'xaxis' : {'title' : 'Date'},
         'yaxis' : {'title' : 'Closing Price ($)'}}

data = Data([unSmoothApple, smoothApple])

fig = Figure(data = data, layout = layout)

pyo.iplot(fig)
C:\Users\saurabhkumar9\AppData\Roaming\Python\Python37\site-packages\plotly\graph_objs\_deprecations.py:40: DeprecationWarning:

plotly.graph_objs.Data is deprecated.
Please replace it with a list or tuple of instances of the following types
  - plotly.graph_objs.Scatter
  - plotly.graph_objs.Bar
  - plotly.graph_objs.Area
  - plotly.graph_objs.Histogram
  - etc.


In [48]:
smoothApple.update({'y' : signal.savgol_filter(appleVals['Close'], 51, 3),})
unSmoothApple.update({'y' : appleVals['Close']})

data = Data([ smoothApple, unSmoothApple ])

fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Stepwise Lines

Step-wise line shapes are only useful for displaying data of a specific nature. These plots display the data as a series of horizontal and vertical steps, rather than a smooth curve (as with smoothing), or a series of diagonal lines (as is normally the case).

For this reason, stepped lines should only be used to display data where the points change at a specific place. For example, the temperature change over a day should not be displayed used a step-wise line as the temperature can change fractionally every minute. On the other hand, something like the Bank of England base rate could be displayed using a step-wise line. Each month the Bank's board meets to decide what the base rate will be for that month. In the time between these meetings, the base rate doesn't change.

Let's use the Bank of England base rate to see what different kinds of step-wise line charts we can create.

The possible step-wise line options and their effects are:

"hv" - marker point is at the start of the horizontal section of the step
"vh" - marker point is at the start of the vertical section of the step
"hvh" - marker point is at the middle of the horizontal section of the step
"vhv" - marker point is at the middle of the vertical section of the step
In [49]:
baserate = pd.read_csv("http://www.richard-muir.com/data/public/csv/BoEBaseRate.csv")
baserate.drop_duplicates(subset="VALUE", inplace=True)
baserate = baserate.tail(10)
baserate
Out[49]:
VALUE DATE
6767 4.50 04/10/2001
6792 4.00 08/11/2001
7105 3.75 06/02/2003
7211 3.50 10/07/2003
7419 4.25 06/05/2004
8560 3.00 06/11/2008
8580 2.00 04/12/2008
8602 1.50 08/01/2009
8622 1.00 05/02/2009
8642 0.50 05/03/2009
In [50]:
baseRateTrace = {'type' : 'scatter',
                 'x' : baserate['DATE'],
                 'y' : baserate['VALUE'],
                 'mode' : 'lines+markers',
                 'line' : {'shape' : 'hv'},
                 'name' : 'BoE Base Rate'}

layout = {'title' : 'Bank of England Base Rate, 2001 - 2009',
         'xaxis' : {'title' : 'Date'},
         'yaxis' : {'title' : 'Base Rate (%)'}}

data = Data([baseRateTrace])

fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
In [51]:
def updateLine(val):
    baseRateTrace['line'].update({'shape' : val})
    data = Data([baseRateTrace])
    fig = Figure(data = data, layout = layout)
    pyo.iplot(fig)
In [52]:
updateLine('linear')
In [53]:
updateLine('vh')
In [54]:
updateLine('hvh') #the line descends (or ascends) from the marker point until the y-value of the next marker point before moving horizontally to meet that point
In [55]:
updateLine('vhv') #point is in the middle of the horizontal portion of the step.

Area Plot

The 'fill' option is contained within the trace object. The possible values are:

"none" - No fill
"tozeroy" - Fills to y = 0 (vertical fill)
"tozerox" - Fills to x = 0 (horizontal fill)
"tonexty" - Fills between traces (vertically, to the trace before or to 0 if there is no previous trace)
"tonextx" - Fills between traces (horizontally, to the trace before or to 0 if there is no previous trace)
"toself"  - Connects the endpoints of a trace into a closed shape (useful for Kernel Density Plots)
"tonext"  - Fills the space between two plots if one completely encloses the other (useful for Kernel Density Plots again)
In [56]:
emissions = pd.read_csv("http://richard-muir.com/data/public/csv/TotalCo2EmissionsByCountry.csv", index_col=0)
emissions.head()
Out[56]:
Year Afghanistan | AFG Albania | ALB Algeria | DZA American Samoa | ASM Andorra | AND Angola | AGO Antigua and Barbuda | ATG Arab World | ARB Argentina | ARG ... Uzbekistan | UZB Vanuatu | VUT Venezuela, RB | VEN Vietnam | VNM Virgin Islands (U.S.) | VIR West Bank and Gaza | PSE World | WLD Yemen, Rep. | YEM Zambia | ZMB Zimbabwe | ZWE
0 1960 414.371 2024.184 6160.560 NaN NaN 550.050 36.670 59563.98922 48815.104 ... NaN NaN 57069.521 7491.681 NaN NaN 9.396706e+06 3633.997 NaN NaN
1 1961 491.378 2280.874 6065.218 NaN NaN 454.708 47.671 65151.09581 51180.319 ... NaN NaN 51928.387 7986.726 NaN NaN 9.434403e+06 2665.909 NaN NaN
2 1962 689.396 2464.224 5669.182 NaN NaN 1180.774 102.676 74357.70773 53695.881 ... NaN 40.337 54106.585 9347.183 NaN NaN 9.818840e+06 3887.020 NaN NaN
3 1963 707.731 2082.856 5427.160 NaN NaN 1151.438 84.341 87895.97916 50083.886 ... NaN 33.003 56204.109 9119.829 NaN NaN 1.035575e+07 2918.932 NaN NaN
4 1964 839.743 2016.850 5650.847 NaN NaN 1224.778 91.675 103196.28160 55727.399 ... NaN 62.339 56603.812 11800.406 NaN NaN 1.094701e+07 3633.997 3278.298 4473.74

5 rows × 249 columns

In [57]:
columnNames = emissions.columns.tolist()
columnNames
Out[57]:
['Year',
 'Afghanistan | AFG',
 'Albania | ALB',
 'Algeria | DZA',
 'American Samoa | ASM',
 'Andorra | AND',
 'Angola | AGO',
 'Antigua and Barbuda | ATG',
 'Arab World | ARB',
 'Argentina | ARG',
 'Armenia | ARM',
 'Aruba | ABW',
 'Australia | AUS',
 'Austria | AUT',
 'Azerbaijan | AZE',
 'Bahamas, The | BHS',
 'Bahrain | BHR',
 'Bangladesh | BGD',
 'Barbados | BRB',
 'Belarus | BLR',
 'Belgium | BEL',
 'Belize | BLZ',
 'Benin | BEN',
 'Bermuda | BMU',
 'Bhutan | BTN',
 'Bolivia | BOL',
 'Bosnia and Herzegovina | BIH',
 'Botswana | BWA',
 'Brazil | BRA',
 'Brunei Darussalam | BRN',
 'Bulgaria | BGR',
 'Burkina Faso | BFA',
 'Burundi | BDI',
 'Cabo Verde | CPV',
 'Cambodia | KHM',
 'Cameroon | CMR',
 'Canada | CAN',
 'Caribbean small states | CSS',
 'Cayman Islands | CYM',
 'Central African Republic | CAF',
 'Central Europe and the Baltics | CEB',
 'Chad | TCD',
 'Channel Islands | CHI',
 'Chile | CHL',
 'China | CHN',
 'Colombia | COL',
 'Comoros | COM',
 'Congo, Dem. Rep. | COD',
 'Congo, Rep. | COG',
 'Costa Rica | CRI',
 "Cote d'Ivoire | CIV",
 'Croatia | HRV',
 'Cuba | CUB',
 'Curacao | CUW',
 'Cyprus | CYP',
 'Czech Republic | CZE',
 'Denmark | DNK',
 'Djibouti | DJI',
 'Dominica | DMA',
 'Dominican Republic | DOM',
 'East Asia & Pacific (all income levels) | EAS',
 'East Asia & Pacific (developing only) | EAP',
 'Ecuador | ECU',
 'Egypt, Arab Rep. | EGY',
 'El Salvador | SLV',
 'Equatorial Guinea | GNQ',
 'Eritrea | ERI',
 'Estonia | EST',
 'Ethiopia | ETH',
 'Euro area | EMU',
 'Europe & Central Asia (all income levels) | ECS',
 'Europe & Central Asia (developing only) | ECA',
 'European Union | EUU',
 'Faroe Islands | FRO',
 'Fiji | FJI',
 'Finland | FIN',
 'Fragile and conflict affected situations | FCS',
 'France | FRA',
 'French Polynesia | PYF',
 'Gabon | GAB',
 'Gambia, The | GMB',
 'Georgia | GEO',
 'Germany | DEU',
 'Ghana | GHA',
 'Greece | GRC',
 'Greenland | GRL',
 'Grenada | GRD',
 'Guam | GUM',
 'Guatemala | GTM',
 'Guinea | GIN',
 'Guinea-Bissau | GNB',
 'Guyana | GUY',
 'Haiti | HTI',
 'Heavily indebted poor countries (HIPC) | HPC',
 'High income | HIC',
 'High income: OECD | OEC',
 'High income: nonOECD | NOC',
 'Honduras | HND',
 'Hong Kong SAR, China | HKG',
 'Hungary | HUN',
 'Iceland | ISL',
 'India | IND',
 'Indonesia | IDN',
 'Iran, Islamic Rep. | IRN',
 'Iraq | IRQ',
 'Ireland | IRL',
 'Isle of Man | IMN',
 'Israel | ISR',
 'Italy | ITA',
 'Jamaica | JAM',
 'Japan | JPN',
 'Jordan | JOR',
 'Kazakhstan | KAZ',
 'Kenya | KEN',
 'Kiribati | KIR',
 'Korea, Dem. People?s Rep. | PRK',
 'Korea, Rep. | KOR',
 'Kosovo | KSV',
 'Kuwait | KWT',
 'Kyrgyz Republic | KGZ',
 'Lao PDR | LAO',
 'Latin America & Caribbean (all income levels) | LCN',
 'Latin America & Caribbean (developing only) | LAC',
 'Latvia | LVA',
 'Least developed countries: UN classification | LDC',
 'Lebanon | LBN',
 'Lesotho | LSO',
 'Liberia | LBR',
 'Libya | LBY',
 'Liechtenstein | LIE',
 'Lithuania | LTU',
 'Low & middle income | LMY',
 'Low income | LIC',
 'Lower middle income | LMC',
 'Luxembourg | LUX',
 'Macao SAR, China | MAC',
 'Macedonia, FYR | MKD',
 'Madagascar | MDG',
 'Malawi | MWI',
 'Malaysia | MYS',
 'Maldives | MDV',
 'Mali | MLI',
 'Malta | MLT',
 'Marshall Islands | MHL',
 'Mauritania | MRT',
 'Mauritius | MUS',
 'Mexico | MEX',
 'Micronesia, Fed. Sts. | FSM',
 'Middle East & North Africa (all income levels) | MEA',
 'Middle East & North Africa (developing only) | MNA',
 'Middle income | MIC',
 'Moldova | MDA',
 'Monaco | MCO',
 'Mongolia | MNG',
 'Montenegro | MNE',
 'Morocco | MAR',
 'Mozambique | MOZ',
 'Myanmar | MMR',
 'Namibia | NAM',
 'Nepal | NPL',
 'Netherlands | NLD',
 'New Caledonia | NCL',
 'New Zealand | NZL',
 'Nicaragua | NIC',
 'Niger | NER',
 'Nigeria | NGA',
 'North America | NAC',
 'Northern Mariana Islands | MNP',
 'Norway | NOR',
 'Not classified | INX',
 'OECD members | OED',
 'Oman | OMN',
 'Other small states | OSS',
 'Pacific island small states | PSS',
 'Pakistan | PAK',
 'Palau | PLW',
 'Panama | PAN',
 'Papua New Guinea | PNG',
 'Paraguay | PRY',
 'Peru | PER',
 'Philippines | PHL',
 'Poland | POL',
 'Portugal | PRT',
 'Puerto Rico | PRI',
 'Qatar | QAT',
 'Romania | ROU',
 'Russian Federation | RUS',
 'Rwanda | RWA',
 'Samoa | WSM',
 'San Marino | SMR',
 'Sao Tome and Principe | STP',
 'Saudi Arabia | SAU',
 'Senegal | SEN',
 'Serbia | SRB',
 'Seychelles | SYC',
 'Sierra Leone | SLE',
 'Singapore | SGP',
 'Sint Maarten (Dutch part) | SXM',
 'Slovak Republic | SVK',
 'Slovenia | SVN',
 'Small states | SST',
 'Solomon Islands | SLB',
 'Somalia | SOM',
 'South Africa | ZAF',
 'South Asia | SAS',
 'South Sudan | SSD',
 'Spain | ESP',
 'Sri Lanka | LKA',
 'St. Kitts and Nevis | KNA',
 'St. Lucia | LCA',
 'St. Martin (French part) | MAF',
 'St. Vincent and the Grenadines | VCT',
 'Sub-Saharan Africa (all income levels) | SSF',
 'Sub-Saharan Africa (developing only) | SSA',
 'Sudan | SDN',
 'Suriname | SUR',
 'Swaziland | SWZ',
 'Sweden | SWE',
 'Switzerland | CHE',
 'Syrian Arab Republic | SYR',
 'Tajikistan | TJK',
 'Tanzania | TZA',
 'Thailand | THA',
 'Timor-Leste | TLS',
 'Togo | TGO',
 'Tonga | TON',
 'Trinidad and Tobago | TTO',
 'Tunisia | TUN',
 'Turkey | TUR',
 'Turkmenistan | TKM',
 'Turks and Caicos Islands | TCA',
 'Tuvalu | TUV',
 'Uganda | UGA',
 'Ukraine | UKR',
 'United Arab Emirates | ARE',
 'United Kingdom | GBR',
 'United States | USA',
 'Upper middle income | UMC',
 'Uruguay | URY',
 'Uzbekistan | UZB',
 'Vanuatu | VUT',
 'Venezuela, RB | VEN',
 'Vietnam | VNM',
 'Virgin Islands (U.S.) | VIR',
 'West Bank and Gaza | PSE',
 'World | WLD',
 'Yemen, Rep. | YEM',
 'Zambia | ZMB',
 'Zimbabwe | ZWE']
In [58]:
UKEmissions = {'type' : 'scatter',
              'x' : emissions['Year'],
              'y' : emissions['United Kingdom | GBR'],
              'mode' : 'lines',
              'name' : 'UK Co2 Emissions'}

layout = {'title' : "Co2 emissions in kilotons for the UK, 1960-2015",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Co2 Emissions (kt)'}}

data = Data([UKEmissions])

fig = Figure(data=data, layout=layout)
pyo.iplot(fig)
In [59]:
UKEmissions.update({'fill' : 'tozeroy'})
data = Data([UKEmissions])

fig = Figure(data=data, layout=layout)
pyo.iplot(fig)
In [60]:
UKEmissions.update({'fillcolor' : 'rgba(89, 100, 212, 0.46)'})
data = Data([UKEmissions])

fig = Figure(data=data, layout=layout)

pyo.iplot(fig)
In [61]:
UKEmissions.update({'fill' : 'tozerox'})
data = Data([UKEmissions])

fig = Figure(data=data, layout=layout)

pyo.iplot(fig)
In [62]:
# x and y value switched
UKEmissions.update({'x' : emissions['United Kingdom | GBR'],
                   'y' : emissions['Year']})
data = Data([UKEmissions])

layout = {'title' : "Co2 emissions in kilotons for the UK, 1960-2015",
         'xaxis' : {'title' : 'Co2 Emissions (kt)'},
         'yaxis' : {'title' : 'Year'}}

fig = Figure(data=data, layout=layout)

pyo.iplot(fig)

Stacked Area Plots

Stacked area graphs are great at displaying how the share between different categories and the total of all categories changes over time.

When making a stacked area chart, the values for each additional trace are cumulative. That is, if Country A has 100Kt emissions, and Country B has 50Kt of emissions, the line for Country B must be drawn at 150Kt of emissions.

In [63]:
ColumnSelection = ['United Arab Emirates | ARE','United Kingdom | GBR', 
                   'United States | USA','China | CHN', 'India | IND','Year']

stackedAreaData = emissions.loc[:,(ColumnSelection)]
stackedAreaData.head(5)
Out[63]:
United Arab Emirates | ARE United Kingdom | GBR United States | USA China | CHN India | IND Year
0 11.001 584299.780 2890696.100 780726.302 120581.961 1960
1 11.001 588938.535 2880505.507 552066.850 130402.187 1961
2 18.335 593360.937 2987207.873 440359.029 143467.708 1962
3 22.002 603822.888 3119230.874 436695.696 154083.673 1963
4 18.335 608355.300 3255995.306 436923.050 150647.694 1964
In [64]:
UnitedArabEmirates = {'type' : 'scatter',
                      'x' : stackedAreaData['Year'],
                      'y' : stackedAreaData['United Arab Emirates | ARE'],
                      'mode' : 'lines',
                      'fill' : 'tonexty',
                      'name' : 'UAE Co2 Emissions'
                     }

UnitedKingdom = {     'type' : 'scatter',
                      'x' : stackedAreaData['Year'],
                      'y' : stackedAreaData['United Arab Emirates | ARE'] + stackedAreaData['United Kingdom | GBR'],
                      'mode' : 'lines',
                      'fill' : 'tonexty',
                      'name' : 'UK Co2 Emissions'
                     }

UnitedStates = {      'type' : 'scatter',
                      'x' : stackedAreaData['Year'],
                      'y' : stackedAreaData['United Arab Emirates | ARE'] + stackedAreaData['United Kingdom | GBR']
                            + stackedAreaData['United States | USA'],
                      'mode' : 'lines',
                      'fill' : 'tonexty',
                      'name' : 'USA Co2 Emissions'
                     }

China = {             'type' : 'scatter',
                      'x' : stackedAreaData['Year'],
                      'y' : stackedAreaData['United Arab Emirates | ARE'] + stackedAreaData['United Kingdom | GBR']
                            + stackedAreaData['United States | USA'] + stackedAreaData['China | CHN'],
                      'mode' : 'lines',
                      'fill' : 'tonexty',
                      'name' : 'China Co2 Emissions'
                     }

India = {             'type' : 'scatter',
                      'x' : stackedAreaData['Year'],
                      'y' : stackedAreaData['United Arab Emirates | ARE'] + stackedAreaData['United Kingdom | GBR']
                            + stackedAreaData['United States | USA'] + stackedAreaData['China | CHN'] 
                             + stackedAreaData['India | IND'],
                      'mode' : 'lines',
                      'fill' : 'tonexty',
                      'name' : 'India Co2 Emissions'
                     }


layout = {'title' : "Co2 emissions in kilotons, 1960-2011",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Co2 Emissions (kt)'}}

data = Data([UnitedArabEmirates, UnitedKingdom, UnitedStates, China, India])

fig = Figure(data=data, layout=layout)

pyo.iplot(fig)

Stacked proportional area plots

Stacked area plot using the 'tonexty' option for the 'fill' option. This allowed us to compare the change in total emissions, as well as the change in each individual country's emissions over a period of time.

In this lesson we're going to create a stacked area plot which shows the percentage of total emissions that each country produced. The code for the chart will be almost identical to the previous lesson; the novelty in this lesson will be learning how to do a little data manipulation to get the data into percentages of total emissions, rather than just the raw figures

In [65]:
sumColumnSelection = ['United Arab Emirates | ARE','United Kingdom | GBR', 
                   'United States | USA','China | CHN', 'India | IND',]

stackedAreaData = emissions.loc[:,(['Year'] + sumColumnSelection)]
stackedAreaData.head()
Out[65]:
Year United Arab Emirates | ARE United Kingdom | GBR United States | USA China | CHN India | IND
0 1960 11.001 584299.780 2890696.100 780726.302 120581.961
1 1961 11.001 588938.535 2880505.507 552066.850 130402.187
2 1962 18.335 593360.937 2987207.873 440359.029 143467.708
3 1963 22.002 603822.888 3119230.874 436695.696 154083.673
4 1964 18.335 608355.300 3255995.306 436923.050 150647.694
In [66]:
stackedAreaData['Total'] = stackedAreaData[sumColumnSelection].sum(axis = 1)
stackedAreaData.head()
Out[66]:
Year United Arab Emirates | ARE United Kingdom | GBR United States | USA China | CHN India | IND Total
0 1960 11.001 584299.780 2890696.100 780726.302 120581.961 4376315.144
1 1961 11.001 588938.535 2880505.507 552066.850 130402.187 4151924.080
2 1962 18.335 593360.937 2987207.873 440359.029 143467.708 4164413.882
3 1963 22.002 603822.888 3119230.874 436695.696 154083.673 4313855.133
4 1964 18.335 608355.300 3255995.306 436923.050 150647.694 4451939.685
In [67]:
for country in sumColumnSelection:
    stackedAreaData["pc_"+str(country)] = stackedAreaData[country] / stackedAreaData['Total']
    
stackedAreaData.head()
Out[67]:
Year United Arab Emirates | ARE United Kingdom | GBR United States | USA China | CHN India | IND Total pc_United Arab Emirates | ARE pc_United Kingdom | GBR pc_United States | USA pc_China | CHN pc_India | IND
0 1960 11.001 584299.780 2890696.100 780726.302 120581.961 4376315.144 0.000003 0.133514 0.660532 0.178398 0.027553
1 1961 11.001 588938.535 2880505.507 552066.850 130402.187 4151924.080 0.000003 0.141847 0.693776 0.132967 0.031408
2 1962 18.335 593360.937 2987207.873 440359.029 143467.708 4164413.882 0.000004 0.142484 0.717318 0.105743 0.034451
3 1963 22.002 603822.888 3119230.874 436695.696 154083.673 4313855.133 0.000005 0.139973 0.723073 0.101231 0.035718
4 1964 18.335 608355.300 3255995.306 436923.050 150647.694 4451939.685 0.000004 0.136649 0.731366 0.098142 0.033839
In [68]:
sumColumnSelectionPC = ['pc_United Arab Emirates | ARE','pc_United Kingdom | GBR', 
                   'pc_United States | USA','pc_China | CHN', 'pc_India | IND',]
In [69]:
PCAreaData = stackedAreaData[sumColumnSelectionPC].cumsum(axis=1)
PCAreaData.head()
Out[69]:
pc_United Arab Emirates | ARE pc_United Kingdom | GBR pc_United States | USA pc_China | CHN pc_India | IND
0 0.000003 0.133517 0.794049 0.972447 1.0
1 0.000003 0.141850 0.835626 0.968592 1.0
2 0.000004 0.142488 0.859806 0.965549 1.0
3 0.000005 0.139978 0.863051 0.964282 1.0
4 0.000004 0.136654 0.868019 0.966161 1.0
In [70]:
PCAreaData['Year'] = stackedAreaData['Year']
PCAreaData.head()
Out[70]:
pc_United Arab Emirates | ARE pc_United Kingdom | GBR pc_United States | USA pc_China | CHN pc_India | IND Year
0 0.000003 0.133517 0.794049 0.972447 1.0 1960
1 0.000003 0.141850 0.835626 0.968592 1.0 1961
2 0.000004 0.142488 0.859806 0.965549 1.0 1962
3 0.000005 0.139978 0.863051 0.964282 1.0 1963
4 0.000004 0.136654 0.868019 0.966161 1.0 1964
In [71]:
traces = []

for col in PCAreaData.columns.tolist():
    if col != 'Year':
        traces.append({'type' : 'scatter',
                      'x' : PCAreaData['Year'],
                      'y' : PCAreaData[col],
                      'name' : col[3:-6],
                      'mode' : 'lines',
                      'fill' : 'tonexty'})
traces
Out[71]:
[{'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     0.000003
  1     0.000003
  2     0.000004
  3     0.000005
  4     0.000004
  5     0.000005
  6     0.000005
  7     0.000187
  8     0.000244
  9     0.003771
  10    0.002557
  11    0.003461
  12    0.003673
  13    0.004603
  14    0.004844
  15    0.004825
  16    0.005908
  17    0.005531
  18    0.006121
  19    0.004940
  20    0.005158
  21    0.005296
  22    0.005366
  23    0.005033
  24    0.006344
  25    0.006605
  26    0.006129
  27    0.005903
  28    0.005699
  29    0.006290
  30    0.006060
  31    0.006502
  32    0.006456
  33    0.007066
  34    0.007585
  35    0.007073
  36    0.003979
  37    0.003982
  38    0.007821
  39    0.007406
  40    0.010282
  41    0.009270
  42    0.007575
  43    0.008807
  44    0.008676
  45    0.008506
  46    0.008670
  47    0.009378
  48    0.010504
  49    0.010445
  50    0.010297
  51    0.010483
  Name: pc_United Arab Emirates | ARE, dtype: float64, 'name': 'United Arab Emirates', 'mode': 'lines', 'fill': 'tonexty'},
 {'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     0.133517
  1     0.141850
  2     0.142488
  3     0.139978
  4     0.136654
  5     0.133743
  6     0.126890
  7     0.121242
  8     0.119355
  9     0.119326
  10    0.112058
  11    0.111422
  12    0.105216
  13    0.103801
  14    0.100339
  15    0.098576
  16    0.095143
  17    0.091778
  18    0.088772
  19    0.091969
  20    0.086119
  21    0.085876
  22    0.085200
  23    0.082748
  24    0.078728
  25    0.080666
  26    0.079936
  27    0.076686
  28    0.072914
  29    0.073442
  30    0.070829
  31    0.071142
  32    0.068220
  33    0.065493
  34    0.064279
  35    0.060941
  36    0.057392
  37    0.054380
  38    0.058860
  39    0.057573
  40    0.059770
  41    0.059166
  42    0.054714
  43    0.053196
  44    0.049981
  45    0.048235
  46    0.046560
  47    0.044917
  48    0.044969
  49    0.040843
  50    0.040536
  51    0.036809
  Name: pc_United Kingdom | GBR, dtype: float64, 'name': 'United Kingdom', 'mode': 'lines', 'fill': 'tonexty'},
 {'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     0.794049
  1     0.835626
  2     0.859806
  3     0.863051
  4     0.868019
  5     0.862111
  6     0.857528
  7     0.876298
  8     0.871215
  9     0.858885
  10    0.837900
  11    0.823159
  12    0.820018
  13    0.820720
  14    0.811361
  15    0.782910
  16    0.782471
  17    0.768107
  18    0.756835
  19    0.753471
  20    0.746229
  21    0.737590
  22    0.712017
  23    0.701046
  24    0.690683
  25    0.674970
  26    0.663285
  27    0.656987
  28    0.649376
  29    0.645439
  30    0.632833
  31    0.621103
  32    0.613593
  33    0.604497
  34    0.593090
  35    0.575413
  36    0.567239
  37    0.568395
  38    0.578116
  39    0.578303
  40    0.580578
  41    0.571173
  42    0.559897
  43    0.521328
  44    0.491540
  45    0.472635
  46    0.445769
  47    0.434737
  48    0.416544
  49    0.379623
  50    0.372846
  51    0.348422
  Name: pc_United States | USA, dtype: float64, 'name': 'United States', 'mode': 'lines', 'fill': 'tonexty'},
 {'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     0.972447
  1     0.968592
  2     0.965549
  3     0.964282
  4     0.966161
  5     0.964349
  6     0.964766
  7     0.964811
  8     0.963237
  9     0.964954
  10    0.967280
  11    0.966369
  12    0.965888
  13    0.966283
  14    0.964129
  15    0.960831
  16    0.960697
  17    0.954979
  18    0.956558
  19    0.955204
  20    0.951283
  21    0.946145
  22    0.942013
  23    0.938436
  24    0.938860
  25    0.935118
  26    0.931762
  27    0.930493
  28    0.928563
  29    0.923472
  30    0.919539
  31    0.915853
  32    0.912978
  33    0.912792
  34    0.910290
  35    0.907875
  36    0.902868
  37    0.900172
  38    0.897135
  39    0.891856
  40    0.891609
  41    0.889960
  42    0.890284
  43    0.894333
  44    0.896685
  45    0.896658
  46    0.894711
  47    0.891616
  48    0.881500
  49    0.873727
  50    0.880138
  51    0.878167
  Name: pc_China | CHN, dtype: float64, 'name': 'China', 'mode': 'lines', 'fill': 'tonexty'},
 {'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     1.0
  1     1.0
  2     1.0
  3     1.0
  4     1.0
  5     1.0
  6     1.0
  7     1.0
  8     1.0
  9     1.0
  10    1.0
  11    1.0
  12    1.0
  13    1.0
  14    1.0
  15    1.0
  16    1.0
  17    1.0
  18    1.0
  19    1.0
  20    1.0
  21    1.0
  22    1.0
  23    1.0
  24    1.0
  25    1.0
  26    1.0
  27    1.0
  28    1.0
  29    1.0
  30    1.0
  31    1.0
  32    1.0
  33    1.0
  34    1.0
  35    1.0
  36    1.0
  37    1.0
  38    1.0
  39    1.0
  40    1.0
  41    1.0
  42    1.0
  43    1.0
  44    1.0
  45    1.0
  46    1.0
  47    1.0
  48    1.0
  49    1.0
  50    1.0
  51    1.0
  Name: pc_India | IND, dtype: float64, 'name': 'India', 'mode': 'lines', 'fill': 'tonexty'}]
In [72]:
data = Data(traces)
layout = {'title' : "Proportion of Co2 Emissions, 1960-2011",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Proprtion of Co2 Emissions'}}
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)

Proportion using Function

In [73]:
def createStackedPropArea(df, time, cols, title, yaxisTitle):
    """
    A function which manipulates the data into the correct format to produce a stacked proportional area plot with Plotly.
    
    Takes five arguments:
    
    df - a pandas DataFrame
    time - the time element of the data, must be a column in the DataFrame as a string
    cols - the name of the columns in the DataFrame which you want to include in the area plot as list
    title - the title of the chart
    yaxisTitle - the yaxis title of the chart (the xaxis title comes from the time variable)
    """
In [74]:
def createStackedPropArea(df, time, cols, title, yaxisTitle):  
    stackedAreaDF = df.loc[:, ([time] + cols)]
    stackedAreaDF.fillna(0, inplace=True)
        
    return stackedAreaDF
   
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR', 
                   'United States | USA','China | CHN', 'India | IND', 'Andorra | AND'],
                            "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test.head()
Out[74]:
Year United Arab Emirates | ARE United Kingdom | GBR United States | USA China | CHN India | IND Andorra | AND
0 1960 11.001 584299.780 2890696.100 780726.302 120581.961 0.0
1 1961 11.001 588938.535 2880505.507 552066.850 130402.187 0.0
2 1962 18.335 593360.937 2987207.873 440359.029 143467.708 0.0
3 1963 22.002 603822.888 3119230.874 436695.696 154083.673 0.0
4 1964 18.335 608355.300 3255995.306 436923.050 150647.694 0.0
In [75]:
def createStackedPropArea(df, time, cols, title, yaxisTitle):   
    stackedAreaDF = df.loc[:, ([time] + cols)]
    stackedAreaDF.fillna(0, inplace=True)
        
    stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
    
    return stackedAreaDF
       
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR', 
                   'United States | USA','China | CHN', 'India | IND',],
                            "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test.head()
Out[75]:
Year United Arab Emirates | ARE United Kingdom | GBR United States | USA China | CHN India | IND Total
0 1960 11.001 584299.780 2890696.100 780726.302 120581.961 4376315.144
1 1961 11.001 588938.535 2880505.507 552066.850 130402.187 4151924.080
2 1962 18.335 593360.937 2987207.873 440359.029 143467.708 4164413.882
3 1963 22.002 603822.888 3119230.874 436695.696 154083.673 4313855.133
4 1964 18.335 608355.300 3255995.306 436923.050 150647.694 4451939.685
In [76]:
def createStackedPropArea(df, time, cols, title, yaxisTitle):
    PCcols = []
    
    stackedAreaDF = df.loc[:, ([time] + cols)]
    stackedAreaDF.fillna(0, inplace=True)
        
    stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)

    for col in cols:
        stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
        PCcols.append("pc_"+str(col))
   
    return stackedAreaDF
    
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR', 
                   'United States | USA','China | CHN', 'India | IND',],
                            "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test.head()
Out[76]:
Year United Arab Emirates | ARE United Kingdom | GBR United States | USA China | CHN India | IND Total pc_United Arab Emirates | ARE pc_United Kingdom | GBR pc_United States | USA pc_China | CHN pc_India | IND
0 1960 11.001 584299.780 2890696.100 780726.302 120581.961 4376315.144 0.000003 0.133514 0.660532 0.178398 0.027553
1 1961 11.001 588938.535 2880505.507 552066.850 130402.187 4151924.080 0.000003 0.141847 0.693776 0.132967 0.031408
2 1962 18.335 593360.937 2987207.873 440359.029 143467.708 4164413.882 0.000004 0.142484 0.717318 0.105743 0.034451
3 1963 22.002 603822.888 3119230.874 436695.696 154083.673 4313855.133 0.000005 0.139973 0.723073 0.101231 0.035718
4 1964 18.335 608355.300 3255995.306 436923.050 150647.694 4451939.685 0.000004 0.136649 0.731366 0.098142 0.033839
In [77]:
def createStackedPropArea(df, time, cols, title, yaxisTitle):
    PCcols = []
    
    stackedAreaDF = df.loc[:, ([time] + cols)]
    stackedAreaDF.fillna(0, inplace=True)
        
    stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
    
    
    
    for col in cols:
        stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
        PCcols.append("pc_"+str(col))
        
    stackedPCAreaDF = stackedAreaDF[PCcols].cumsum(axis=1)
    stackedPCAreaDF[time] = stackedAreaDF[time]
    
    
    return stackedPCAreaDF
    
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR', 
                   'United States | USA','China | CHN', 'India | IND',],
                            "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test.head()
Out[77]:
pc_United Arab Emirates | ARE pc_United Kingdom | GBR pc_United States | USA pc_China | CHN pc_India | IND Year
0 0.000003 0.133517 0.794049 0.972447 1.0 1960
1 0.000003 0.141850 0.835626 0.968592 1.0 1961
2 0.000004 0.142488 0.859806 0.965549 1.0 1962
3 0.000005 0.139978 0.863051 0.964282 1.0 1963
4 0.000004 0.136654 0.868019 0.966161 1.0 1964
In [78]:
def createStackedPropArea(df, time, cols, title, yaxisTitle):  
    
    PCcols = []
    traces = []
    
    stackedAreaDF = df.loc[:, ([time] + cols)]
        
    stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
    stackedAreaDF.fillna(0, inplace=True)

    for col in cols:
        stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
        PCcols.append("pc_"+str(col))
        
    stackedPCAreaDF = stackedAreaDF[PCcols].cumsum(axis=1)
    stackedPCAreaDF[time] = stackedAreaDF[time]

    for col in PCcols:
        traces.append({'type' : 'scatter',
                     'x' : stackedPCAreaDF[time],
                     'y' : stackedPCAreaDF[col],
                      'name' : col[3:-6],
                      'mode' : 'lines',
                      'fill' : 'tonexty'})
    
    return traces
  
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR', 
                   'United States | USA','China | CHN', 'India | IND',],
                            "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test
Out[78]:
[{'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     0.000003
  1     0.000003
  2     0.000004
  3     0.000005
  4     0.000004
  5     0.000005
  6     0.000005
  7     0.000187
  8     0.000244
  9     0.003771
  10    0.002557
  11    0.003461
  12    0.003673
  13    0.004603
  14    0.004844
  15    0.004825
  16    0.005908
  17    0.005531
  18    0.006121
  19    0.004940
  20    0.005158
  21    0.005296
  22    0.005366
  23    0.005033
  24    0.006344
  25    0.006605
  26    0.006129
  27    0.005903
  28    0.005699
  29    0.006290
  30    0.006060
  31    0.006502
  32    0.006456
  33    0.007066
  34    0.007585
  35    0.007073
  36    0.003979
  37    0.003982
  38    0.007821
  39    0.007406
  40    0.010282
  41    0.009270
  42    0.007575
  43    0.008807
  44    0.008676
  45    0.008506
  46    0.008670
  47    0.009378
  48    0.010504
  49    0.010445
  50    0.010297
  51    0.010483
  Name: pc_United Arab Emirates | ARE, dtype: float64, 'name': 'United Arab Emirates', 'mode': 'lines', 'fill': 'tonexty'},
 {'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     0.133517
  1     0.141850
  2     0.142488
  3     0.139978
  4     0.136654
  5     0.133743
  6     0.126890
  7     0.121242
  8     0.119355
  9     0.119326
  10    0.112058
  11    0.111422
  12    0.105216
  13    0.103801
  14    0.100339
  15    0.098576
  16    0.095143
  17    0.091778
  18    0.088772
  19    0.091969
  20    0.086119
  21    0.085876
  22    0.085200
  23    0.082748
  24    0.078728
  25    0.080666
  26    0.079936
  27    0.076686
  28    0.072914
  29    0.073442
  30    0.070829
  31    0.071142
  32    0.068220
  33    0.065493
  34    0.064279
  35    0.060941
  36    0.057392
  37    0.054380
  38    0.058860
  39    0.057573
  40    0.059770
  41    0.059166
  42    0.054714
  43    0.053196
  44    0.049981
  45    0.048235
  46    0.046560
  47    0.044917
  48    0.044969
  49    0.040843
  50    0.040536
  51    0.036809
  Name: pc_United Kingdom | GBR, dtype: float64, 'name': 'United Kingdom', 'mode': 'lines', 'fill': 'tonexty'},
 {'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     0.794049
  1     0.835626
  2     0.859806
  3     0.863051
  4     0.868019
  5     0.862111
  6     0.857528
  7     0.876298
  8     0.871215
  9     0.858885
  10    0.837900
  11    0.823159
  12    0.820018
  13    0.820720
  14    0.811361
  15    0.782910
  16    0.782471
  17    0.768107
  18    0.756835
  19    0.753471
  20    0.746229
  21    0.737590
  22    0.712017
  23    0.701046
  24    0.690683
  25    0.674970
  26    0.663285
  27    0.656987
  28    0.649376
  29    0.645439
  30    0.632833
  31    0.621103
  32    0.613593
  33    0.604497
  34    0.593090
  35    0.575413
  36    0.567239
  37    0.568395
  38    0.578116
  39    0.578303
  40    0.580578
  41    0.571173
  42    0.559897
  43    0.521328
  44    0.491540
  45    0.472635
  46    0.445769
  47    0.434737
  48    0.416544
  49    0.379623
  50    0.372846
  51    0.348422
  Name: pc_United States | USA, dtype: float64, 'name': 'United States', 'mode': 'lines', 'fill': 'tonexty'},
 {'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     0.972447
  1     0.968592
  2     0.965549
  3     0.964282
  4     0.966161
  5     0.964349
  6     0.964766
  7     0.964811
  8     0.963237
  9     0.964954
  10    0.967280
  11    0.966369
  12    0.965888
  13    0.966283
  14    0.964129
  15    0.960831
  16    0.960697
  17    0.954979
  18    0.956558
  19    0.955204
  20    0.951283
  21    0.946145
  22    0.942013
  23    0.938436
  24    0.938860
  25    0.935118
  26    0.931762
  27    0.930493
  28    0.928563
  29    0.923472
  30    0.919539
  31    0.915853
  32    0.912978
  33    0.912792
  34    0.910290
  35    0.907875
  36    0.902868
  37    0.900172
  38    0.897135
  39    0.891856
  40    0.891609
  41    0.889960
  42    0.890284
  43    0.894333
  44    0.896685
  45    0.896658
  46    0.894711
  47    0.891616
  48    0.881500
  49    0.873727
  50    0.880138
  51    0.878167
  Name: pc_China | CHN, dtype: float64, 'name': 'China', 'mode': 'lines', 'fill': 'tonexty'},
 {'type': 'scatter', 'x': 0     1960
  1     1961
  2     1962
  3     1963
  4     1964
  5     1965
  6     1966
  7     1967
  8     1968
  9     1969
  10    1970
  11    1971
  12    1972
  13    1973
  14    1974
  15    1975
  16    1976
  17    1977
  18    1978
  19    1979
  20    1980
  21    1981
  22    1982
  23    1983
  24    1984
  25    1985
  26    1986
  27    1987
  28    1988
  29    1989
  30    1990
  31    1991
  32    1992
  33    1993
  34    1994
  35    1995
  36    1996
  37    1997
  38    1998
  39    1999
  40    2000
  41    2001
  42    2002
  43    2003
  44    2004
  45    2005
  46    2006
  47    2007
  48    2008
  49    2009
  50    2010
  51    2011
  Name: Year, dtype: int64, 'y': 0     1.0
  1     1.0
  2     1.0
  3     1.0
  4     1.0
  5     1.0
  6     1.0
  7     1.0
  8     1.0
  9     1.0
  10    1.0
  11    1.0
  12    1.0
  13    1.0
  14    1.0
  15    1.0
  16    1.0
  17    1.0
  18    1.0
  19    1.0
  20    1.0
  21    1.0
  22    1.0
  23    1.0
  24    1.0
  25    1.0
  26    1.0
  27    1.0
  28    1.0
  29    1.0
  30    1.0
  31    1.0
  32    1.0
  33    1.0
  34    1.0
  35    1.0
  36    1.0
  37    1.0
  38    1.0
  39    1.0
  40    1.0
  41    1.0
  42    1.0
  43    1.0
  44    1.0
  45    1.0
  46    1.0
  47    1.0
  48    1.0
  49    1.0
  50    1.0
  51    1.0
  Name: pc_India | IND, dtype: float64, 'name': 'India', 'mode': 'lines', 'fill': 'tonexty'}]
In [79]:
def createStackedPropArea(df, time, cols, title, yaxisTitle): 
    """
    A function which manipulates the data into the correct format to produce a stacked proportional area plot with Plotly.
    Takes five arguments:
    df - a pandas DataFrame
    time - the time element of the data, must be a column in the DataFrame
    cols - the name of the columns in the DataFrame which you want to include in the area plot
    title - the title of the chart
    yaxisTitle - the yaxis title of the chart (the xaxis title comes from the time variable)
    """
    PCcols = []
    traces = []
    
    stackedAreaDF = df.loc[:, ([time] + cols)]
    stackedAreaDF.fillna(0, inplace=True)
        
    stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
    
    for col in cols:
        stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
        PCcols.append("pc_"+str(col))
        
    stackedPCAreaDF = stackedAreaDF[PCcols].cumsum(axis=1)
    stackedPCAreaDF[time] = stackedAreaDF[time]

    for col in PCcols:
        traces.append({'type' : 'scatter',
                      'x' : stackedPCAreaDF[time],
                      'y' : stackedPCAreaDF[col],
                      'name' : col[3:-6],
                      'mode' : 'lines',
                      'fill' : 'tonexty'})
    
    data = Data(traces)
    layout = {'title' : title,
             'xaxis' : {'title' : time},
             'yaxis' : {'title' : yaxisTitle}}
    fig = Figure(data = data, layout = layout)
    pyo.iplot(fig)
    
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR', 
                   'United States | USA','China | CHN', 'India | IND',],
                            "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
In [80]:
createStackedPropArea(emissions, 'Year', ['China | CHN', 
                                          'United States | USA', 
                                          'India | IND',
                                          'United Arab Emirates | ARE',
                                          'United Kingdom | GBR', 
                                          ],
                      "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
In [81]:
import random
countries = []
for i in range(10):
    countries.append(random.choice(emissions.columns.tolist()))
countries
Out[81]:
['Kuwait | KWT',
 'Antigua and Barbuda | ATG',
 'Central African Republic | CAF',
 'Aruba | ABW',
 'Sao Tome and Principe | STP',
 'Croatia | HRV',
 'New Zealand | NZL',
 'Gambia, The | GMB',
 'Low income | LIC',
 'Canada | CAN']
In [82]:
createStackedPropArea(emissions, 'Year', countries,  "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')

Styling your Charts

Chart-Presentation-1

In [83]:
stocks = py.get_figure("https://plot.ly/~rmuir/162/stock-closing-prices-for-apple-in-2012/")
stocks = py.get_figure('rmuir', 162)
stocks
In [84]:
pyo.iplot(stocks)

Changing the range

Each of 'xaxis' and 'yaxis' within the layout object have their own 'range' attribute which we can change. We set the range for each axis as a list of two numbers (float or integer). These number correspond to the minimum and maximum values respectively:

layout = {'xaxis' : {'range' : [, ]}, 'yaxis' : {'range' : [, ]} }

In [85]:
stocks['layout']['yaxis'].update({'range' : [0, 1000]})
pyo.iplot(stocks)
In [86]:
maximum = max(stocks['data'][0]['y'])
maximum# Passing max + 5% value 

stocks['layout']['yaxis'].update({'range' : [0, maximum * 1.05]})
pyo.iplot(stocks)

Setting the tickformat

The 'tickformat' option is contained within each of the x- and y-axis objects within the layout:

layout = {'xaxis' : {'tickformat' : }, 'yaxis' : {'tickformat' : }}

To change the tick format, we can pass different format string as the value for this option.

Here are some common string format values we can pass: Percentage with 2 decimal places: ".2%" Percentage with 0 decimal places: ".0%" Currency with 2 decimal places: "$.2f" Currency with 0 decimal places: "$.0" Currency with 0 decimal places and thousand separator: "$,"

In [87]:
stocks['layout']['yaxis'].update({'tickformat' : '.2%'})
pyo.iplot(stocks)
In [88]:
stocks['layout']['yaxis'].update({'tickformat' : '$.2f'})
pyo.iplot(stocks)
In [89]:
stocks['layout']['yaxis'].update({'range' : [0, 1500], 'tickformat' : '$,'})
pyo.iplot(stocks)
In [90]:
stocks['layout'].update({'yaxis' : {'range' : [0, max(stocks['data'][0]['y']) * 1.05],
                                    'title' : 'Closing Price', 'tickformat' : "$.0"}})
pyo.iplot(stocks)

Plotly has a builtin datetime formatter.

Datetime object is made of two parts; a date and a time. Plotly does not recognise a time without a date. A date object: 31/12/95 - 31st December 1995 A time object: 12:35:59.99 - Almost 12:36pm (This is not valid on its own) A datetime object: 31/12/95 12:35:59.99 - Almost 12:36pm on 31st December 1995

Common date formats:

UK date: "%d/%m/%y" UK date with 4 digit year: "%d/%m/%Y" American date with hyphens: "%m-%d-%Y" Abbreviated weekday and month names: "%a %d %b %Y" Unabbreviated weekday and month names: "%A %d %B %Y"

Common time formats:

Datetime objects have a default time of 00:00:00.00: 24 hour clock with microseconds: "%H:%M:%S.%f" 12 hour clock: "%H%p %M:%S" If you have date and time data that you need to display, you can combined these formats: American date with hyphens and 24 hour clock: "%m-%d-%Y %H:%M:%S"

In [91]:
def updateDT(dt):
    stocks['layout'].update({'xaxis' : {'tickformat' : dt}})
    pyo.iplot(stocks)
In [92]:
updateDT("%d/%m/%y")
In [93]:
updateDT("%m-%d-%Y")
In [94]:
updateDT("%a %d %b %Y")
In [95]:
updateDT("%b %Y")

Modifying tickvalues

Pie Chart

Intro

A pie chart is a circular chart which is used to display categorical data where the different categories comprise parts of a whole. The area of the circle is divided into segments that each represent a proportion of a whole.

A pie chart should be used when there are very few categories; any more than 6 and it will become difficult to read. They should also only be used when there is a large distinction between one of the categories.

In [99]:
ethnicity = pd.read_csv("http://www.richard-muir.com/data/public/csv/UKStudentsEthnicity.csv",
                        index_col = 0, header=None, names=['N'])
ethnicity
Out[99]:
N
White 1418685.0
Other 84525.0
Not known 33290.0
Black 117460.0
Asian 175240.0

Pie Chart Basic

To make a pie chart with Plotly, we only need to pass three parameters to our trace; 'labels', 'values' and 'type':

  1. 'labels' is a list of the categories that we're plotting
  2. 'values' is a list of the number of things in that category
  3. 'type' should be set to 'pie' to tell Plotly that we're making a pie chart The lists for 'labels' and 'values' must be the same length.
In [167]:
pieTrace = {'type' : 'pie',
           'labels' : ethnicity.index,
           'values' : ethnicity['N']}

data = [pieTrace]

layout = {'title' : 'Ethnicity of students in the UK'}

fig = {'data' : data, 'layout' : layout}

pyo.iplot(fig)

Sorting largest to smallest: 'sort': helps is sorting pie chart

In [101]:
fig['data'][0].update({'sort' : False})
pyo.iplot(fig)
In [103]:
pieTrace = {'type' : 'pie',
           'labels' : ethnicity.index,
           'values' : ethnicity['N'],
           'sort' : True}

data = [pieTrace]

layout = {'title' : 'Ethnicity of students in the UK'}

fig = {'data' : data, 'layout' : layout}

pyo.iplot(fig)

'Direction': Parameters helps in defyining direction of sort. 'counterclockwise'or 'clockwise'

In [104]:
fig['data'][0].update({'direction' : 'clockwise',
                      'sort' : True})
pyo.iplot(fig)

Styling the pie chart

In [105]:
ethPie = py.get_figure("rmuir", 263)
pyo.iplot(ethPie)

Change the colour of the individual segments

In [106]:
ethPie['data'][0].update({'marker' : {'colors' : ["rgb(12,192,170)", 
                                                  "rgb(190,252,250)", 
                                                  "rgb(77,194,84)", 
                                                  "rgb(211,238,128)", 
                                                  "rgb(97,167,193)"]}})

pyo.iplot(ethPie)

Change the width and colour of the line that surrounds each segment

In [112]:
ethPie['data'][0]['marker'].update({'line' : {'color' : '#333',
                                              'width' : [1,1,3,1,1]}})
pyo.iplot(ethPie)

Highlighting specific segments

In [114]:
ethPie['data'][0].update({'pull' : 0.1})
pyo.iplot(ethPie)

Moving Specific Segment

In [115]:
ethPie['data'][0].update({'pull' : [0, 0, 0.2, 0, 0]})
pyo.iplot(ethPie)

Labels, text & hoverinfo

Info

In [122]:
level = pd.read_csv("http://richard-muir.com/data/public/csv/StudentsByLevelAndYear.csv", index_col = 0)
level
Out[122]:
2005/06 2006/07 2007/08 2008/09 2009/10 2010/11 2011/12 2012/13 2013/14 2014/15 2015/16
Postgraduate part-time 114940 116220 116570 129055 132790 127750 109535 102890 106260 107950 107120
Postgraduate full-time 155665 162575 161015 177595 200880 207595 207665 203155 211875 209805 210945
Undergraduate part-time 337240 341035 332320 344775 334820 301025 278530 199940 175375 157835 148570
Undergraduate full-time 450485 437775 458575 493425 516770 509065 521605 466270 502230 513295 525490
In [117]:
level = level[['2015/16']]
level.sort_values(by='2015/16', ascending = False, inplace = True)
level
C:\Users\saurabhkumar9\AppData\Local\Continuum\anaconda2\envs\spyder\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

Out[117]:
2015/16
Undergraduate full-time 525490
Postgraduate full-time 210945
Undergraduate part-time 148570
Postgraduate part-time 107120
In [118]:
fig = {'data' : [{'type' : 'pie',
                  'name' : "Students by level of study",
                 'labels' : level.index,
                 'values' : level['2015/16'],
                 'direction' : 'clockwise',
                 'marker' : {'colors' : ["rgb(183,101,184)", "rgb(236,77,216)", "rgb(176,164,216)", "rgb(255,168,255)"]}}],
      'layout' : {'title' : 'Students by level of study in 2015-16'}}

pyo.iplot(fig)

Different text options on pie charts

Pie charts also have a parameter called 'textinfo'. This determines which trace information appears written on the pie chart. 'textinfo' can take any of the following values, joined with a '+':

'label' - displays the label on the segment
'text' - displays the text on the segment (this can be set separately to the label)
'value' - displays the value passed into the trace
'percent' - displayed the computer percentage

We can see that the default is to only show 'percent'.

In [119]:
fig['data'][0].update({'text' : ['Undergrad FT',' Postgrad FT','Undergrad PT','Postgrad PT'],
                       'textinfo' : 'label+text+value+percent'})

pyo.iplot(fig)
In [120]:
# removing label
fig['data'][0].update({'textinfo' : 'text+value+percent',
                      'showlegend' : False})
pyo.iplot(fig)

Hoverinfo

In [121]:
# Information can be shown usinh hover info. Here we are displaying label.
fig['data'][0].update({'hoverinfo' : 'label'})
pyo.iplot(fig)

Formatting text

In [124]:
level = py.get_figure("rmuir", 269)
pyo.iplot(level)
In [125]:
level['data'][0].update({'textposition' : 'none'}) # Removes the text
pyo.iplot(level)
In [126]:
level['data'][0].update({'textposition' : 'outside'})
pyo.iplot(level)
In [127]:
level['data'][0].update({'textposition' : 'inside'})
pyo.iplot(level)
In [129]:
# Styling Text
pieTrace = {'type' : 'pie',
           'labels' : ethnicity.index,
           'values' : ethnicity['N'],
           'marker' : {'colors' : ["rgb(12,192,170)", 
                                                  "rgb(190,252,250)", 
                                                  "rgb(77,194,84)", 
                                                  "rgb(211,238,128)", 
                                                  "rgb(97,167,193)"]},
           'pull' : [0, 0, 0.2, 0, 0]
           }

data = [pieTrace]

layout = {'title' : 'Ethnicity of students in the UK'}

fig = {'data' : data, 'layout' : layout}

pyo.iplot(fig)

Text can be set separately using 'outsidetextfont' and 'insidetextfont'

In [131]:
fig['data'][0].update({'outsidetextfont' : {'size' : 16},
                      'insidetextfont' : {'color' : 'black',
                                         'size' : 13}})
pyo.iplot(fig)

Using Pie Chart in a sub plot

Making a subplots object

In [155]:
from plotly.tools import make_subplots
sub = make_subplots(rows = 2, cols = 2)
print(sub)
Figure({
    'data': [],
    'layout': {'template': '...',
               'xaxis': {'anchor': 'y', 'domain': [0.0, 0.45]},
               'xaxis2': {'anchor': 'y2', 'domain': [0.55, 1.0]},
               'xaxis3': {'anchor': 'y3', 'domain': [0.0, 0.45]},
               'xaxis4': {'anchor': 'y4', 'domain': [0.55, 1.0]},
               'yaxis': {'anchor': 'x', 'domain': [0.575, 1.0]},
               'yaxis2': {'anchor': 'x2', 'domain': [0.575, 1.0]},
               'yaxis3': {'anchor': 'x3', 'domain': [0.0, 0.425]},
               'yaxis4': {'anchor': 'x4', 'domain': [0.0, 0.425]}}
})

This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (2,1) x3,y3 ] [ (2,2) x4,y4 ]

Subplot Scatter

Simple Subplot
In [172]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=3, cols=1)

fig.append_trace(go.Scatter(
    x=[3, 4, 5],
    y=[1000, 1100, 1200],
), row=1, col=1)

fig.append_trace(go.Scatter(
    x=[2, 3, 4],
    y=[100, 110, 120],
), row=2, col=1)

fig.append_trace(go.Scatter(
    x=[0, 1, 2],
    y=[10, 11, 12]
), row=3, col=1)

fig.update_layout(height=600, width=600, title_text="Stacked subplots")
fig.show()
Stacked Subplot
In [174]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=3, cols=1)

fig.append_trace(go.Scatter(
    x=[3, 4, 5],
    y=[1000, 1100, 1200],
), row=1, col=1)

fig.append_trace(go.Scatter(
    x=[2, 3, 4],
    y=[100, 110, 120],
), row=2, col=1)

fig.append_trace(go.Scatter(
    x=[0, 1, 2],
    y=[10, 11, 12]
), row=3, col=1)


fig.update_layout(height=600, width=600, title_text="Stacked subplots")
fig.show()
Multiple Subplot
In [175]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=2, start_cell="bottom-left")

fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]),
              row=1, col=1)

fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]),
              row=1, col=2)

fig.add_trace(go.Scatter(x=[300, 400, 500], y=[600, 700, 800]),
              row=2, col=1)

fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]),
              row=2, col=2)

fig.show()
Multiple Subplot with Titles
In [176]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=("Plot 1", "Plot 2", "Plot 3", "Plot 4"))

fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]),
              row=1, col=1)

fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]),
              row=1, col=2)

fig.add_trace(go.Scatter(x=[300, 400, 500], y=[600, 700, 800]),
              row=2, col=1)

fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]),
              row=2, col=2)

fig.update_layout(height=500, width=700,
                  title_text="Multiple Subplots with Titles")

fig.show()
Simple Subplot with Annotations
In [177]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=2)

fig.add_trace(
    go.Scatter(
        x=[1, 2, 3],
        y=[4, 5, 6],
        mode="markers+text",
        text=["Text A", "Text B", "Text C"],
        textposition="bottom center"
    ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=[20, 30, 40],
        y=[50, 60, 70],
        mode="markers+text",
        text=["Text D", "Text E", "Text F"],
        textposition="bottom center"
    ),
    row=1, col=2
)

fig.update_layout(height=600, width=800, title_text="Annotations and subplots")

fig.show()
Side by Side Subplot
In [178]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2, column_widths=[0.7, 0.3])

fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]),
              row=1, col=1)

fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]),
              row=1, col=2)

fig.show()
Customizing subplot axis
In [179]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Initialize figure with subplots
fig = make_subplots(
    rows=2, cols=2, subplot_titles=("Plot 1", "Plot 2", "Plot 3", "Plot 4")
)

# Add traces
fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]), row=1, col=1)
fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]), row=1, col=2)
fig.add_trace(go.Scatter(x=[300, 400, 500], y=[600, 700, 800]), row=2, col=1)
fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]), row=2, col=2)

# Update xaxis properties
fig.update_xaxes(title_text="xaxis 1 title", row=1, col=1)
fig.update_xaxes(title_text="xaxis 2 title", range=[10, 50], row=1, col=2)
fig.update_xaxes(title_text="xaxis 3 title", showgrid=False, row=2, col=1)
fig.update_xaxes(title_text="xaxis 4 title", type="log", row=2, col=2)

# Update yaxis properties
fig.update_yaxes(title_text="yaxis 1 title", row=1, col=1)
fig.update_yaxes(title_text="yaxis 2 title", range=[40, 80], row=1, col=2)
fig.update_yaxes(title_text="yaxis 3 title", showgrid=False, row=2, col=1)
fig.update_yaxes(title_text="yaxis 4 title", row=2, col=2)

# Update title and height
fig.update_layout(title_text="Customizing Subplot Axes", height=700)

fig.show()
Subplots with Shared X-Axes
In [180]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02
)

fig.add_trace(go.Scatter(x=[0, 1, 2], y=[10, 11, 12]),
              row=3, col=1)

fig.add_trace(go.Scatter(x=[2, 3, 4], y=[100, 110, 120]),
              row=2, col=1)

fig.add_trace(go.Scatter(x=[3, 4, 5], y=[1000, 1100, 1200]),
              row=1, col=1)

fig.update_layout(height=600, width=600,
                  title_text="Stacked Subplots with Shared X-Axes")
fig.show()
Subplots with Shared Y-Axes
In [181]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=2, cols=2, shared_yaxes=True)

fig.add_trace(go.Scatter(x=[1, 2, 3], y=[2, 3, 4]),
              row=1, col=1)

fig.add_trace(go.Scatter(x=[20, 30, 40], y=[5, 5, 5]),
              row=1, col=2)

fig.add_trace(go.Scatter(x=[2, 3, 4], y=[600, 700, 800]),
              row=2, col=1)

fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]),
              row=2, col=2)

fig.update_layout(height=600, width=600,
                  title_text="Multiple Subplots with Shared Y-Axes")
fig.show()
Custom Sized Subplot with Subplot Titles

The colspan subplot option specifies the number of grid columns that the subplot starting in the given cell should occupy. If unspecified, colspan defaults to 1.

Here is an example that creates a 2 by 2 subplot grid containing 3 subplots. The subplot specs element for position (2, 1) has a colspan value of 2, causing it to span the full figure width. The subplot specs element for position (2, 2) is None because no subplot begins at this location in the grid.

In [182]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=2, cols=2,
    specs=[[{}, {}],
           [{"colspan": 2}, None]],
    subplot_titles=("First Subplot","Second Subplot", "Third Subplot"))

fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2]),
                 row=1, col=1)

fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2]),
                 row=1, col=2)
fig.add_trace(go.Scatter(x=[1, 2, 3], y=[2, 1, 2]),
                 row=2, col=1)

fig.update_layout(showlegend=False, title_text="Specs with Subplot Title")
fig.show()
Multiple Custom Sized Subplots

rowspan and colspan subplot options to create a custom subplot layout with subplots of mixed sizes

In [183]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=5, cols=2,
    specs=[[{}, {"rowspan": 2}],
           [{}, None],
           [{"rowspan": 2, "colspan": 2}, None],
           [None, None],
           [{}, {}]],
    print_grid=True)

fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(1,1)"),
              row=1, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(1,2)"), row=1, col=2)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(2,1)"), row=2, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(3,1)"), row=3, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(5,1)"), row=5, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(5,2)"), row=5, col=2)

fig.update_layout(height=600, width=600, title_text="specs examples")
fig.show()
This is the format of your plot grid:
[ (1,1) x,y   ]  ⎡ (1,2) x2,y2 ⎤
[ (2,1) x3,y3 ]  ⎣      :      ⎦
⎡ (3,1) x4,y4           -      ⎤
⎣      :                :      ⎦
[ (5,1) x5,y5 ]  [ (5,2) x6,y6 ]

In [ ]:
Subplots Types

By default, the make_subplots function assumes that the traces that will be added to all subplots are 2-dimensional cartesian traces (e.g. scatter, bar, histogram, violin, etc.). Traces with other subplot types (e.g. scatterpolar, scattergeo, parcoords, etc.) are supporteed by specifying the type subplot option in the specs argument to make_subplots. Here are the possible values for the type option:

  1. "xy": 2D Cartesian subplot type for scatter, bar, etc. This is the default if no type is specified.
  2. "scene": 3D Cartesian subplot for scatter3d, cone, etc.
  3. "polar": Polar subplot for scatterpolar, barpolar, etc.
  4. "ternary": Ternary subplot for scatterternary.
  5. "mapbox": Mapbox subplot for scattermapbox.
  6. "domain": Subplot type for traces that are individually positioned. pie, parcoords, parcats, etc.

trace type: A trace type name (e.g. "bar", "scattergeo", "carpet", "mesh", etc.) which will be used to determine the appropriate subplot type for that trace.

In [184]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "xy"}, {"type": "polar"}],
           [{"type": "domain"}, {"type": "scene"}]],
)

fig.add_trace(go.Bar(y=[2, 3, 1]),
              row=1, col=1)

fig.add_trace(go.Barpolar(theta=[0, 45, 90], r=[2, 3, 1]),
              row=1, col=2)

fig.add_trace(go.Pie(values=[2, 3, 1]),
              row=2, col=1)

fig.add_trace(go.Scatter3d(x=[2, 3, 1], y=[0, 0, 0], z=[0.5, 1, 2], mode="lines"),
              row=2, col=2)

fig.update_layout(height=700, showlegend=False)

fig.show()
In [187]:
import plotly.graph_objects as go

fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "barpolar"}],
           [{"type": "pie"}, {"type": "scatter3d"}]],
)

fig.add_trace(go.Bar(y=[2, 3, 1]),
              row=1, col=1)

fig.add_trace(go.Barpolar(theta=[0, 45, 90], r=[2, 3, 1]),
              row=1, col=2)

fig.add_trace(go.Pie(values=[2, 3, 1]),
              row=2, col=1)

fig.add_trace(go.Scatter3d(x=[2, 3, 1], y=[0, 0, 0], z=[0.5, 1, 2], mode="lines"),
              row=2, col=2)

fig.update_layout(height=700, showlegend=False)

fig.show()
Side by Side Subplot (low-level API)
In [188]:
import plotly.graph_objects as go

trace1 = go.Scatter(
    x=[1, 2, 3],
    y=[2, 3, 4]
)
trace2 = go.Scatter(
    x=[20, 30, 40],
    y=[5, 5, 5],
    xaxis="x2",
    yaxis="y"
)
trace3 = go.Scatter(
    x=[2, 3, 4],
    y=[600, 700, 800],
    xaxis="x",
    yaxis="y3"
)
trace4 = go.Scatter(
    x=[4000, 5000, 6000],
    y=[7000, 8000, 9000],
    xaxis="x4",
    yaxis="y4"
)
data = [trace1, trace2, trace3, trace4]
layout = go.Layout(
    xaxis=dict(
        domain=[0, 0.45]
    ),
    yaxis=dict(
        domain=[0, 0.45]
    ),
    xaxis2=dict(
        domain=[0.55, 1]
    ),
    xaxis4=dict(
        domain=[0.55, 1],
        anchor="y4"
    ),
    yaxis3=dict(
        domain=[0.55, 1]
    ),
    yaxis4=dict(
        domain=[0.55, 1],
        anchor="x4"
    )
)
fig = go.Figure(data=data, layout=layout)
fig.show()
Stacked Subplots with a Shared X-Axis (low-level API)
In [189]:
import plotly.graph_objects as go

trace1 = go.Scatter(
    x=[0, 1, 2],
    y=[10, 11, 12]
)
trace2 = go.Scatter(
    x=[2, 3, 4],
    y=[100, 110, 120],
    yaxis="y2"
)
trace3 = go.Scatter(
    x=[3, 4, 5],
    y=[1000, 1100, 1200],
    yaxis="y3"
)
data = [trace1, trace2, trace3]
layout = go.Layout(
    yaxis=dict(
        domain=[0, 0.33]
    ),
    legend=dict(
        traceorder="reversed"
    ),
    yaxis2=dict(
        domain=[0.33, 0.66]
    ),
    yaxis3=dict(
        domain=[0.66, 1]
    )
)
fig = go.Figure(data=data, layout=layout)
fig.show()

Subplot Pie Chart

In [173]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

labels = ["US", "China", "European Union", "Russian Federation", "Brazil", "India",
          "Rest of World"]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=[16, 15, 12, 6, 5, 4, 42], name="GHG Emissions"),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=[27, 11, 25, 8, 1, 3, 25], name="CO2 Emissions"),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Global Emissions 1990-2011",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='GHG', x=0.18, y=0.5, font_size=20, showarrow=False),
                 dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False)])
fig.show()

Donut Plot

Donut charts can be used when the information that we need to convey is very simple and the differences between the categories are stark. They can be used when seeing the general trend in the data is more important than knowing the exact figures.

Donut charts are most useful when you need to display contextual information about your data, and that contextual information must be inextricably linked to that bit of data - by using a donut chart, you can put the information in the hole.

In [190]:
outcomes = pd.read_csv("http://richard-muir.com/data/public/csv/StudentOutcomes201415BySubjectArea.csv", index_col = 0)
outcomes
Out[190]:
Work Work and further study Further study Unemployed Other
Medicine & dentistry 6855.0 135.0 345.0 15.0 35.0
Subjects allied to medicine 22135.0 935.0 1825.0 620.0 630.0
Biological sciences 15725.0 1800.0 6045.0 1520.0 1335.0
Veterinary science 550.0 10.0 20.0 5.0 10.0
Agriculture & related subjects 1265.0 120.0 195.0 120.0 145.0
Physical sciences 6510.0 555.0 3410.0 920.0 620.0
Mathematical sciences 2910.0 360.0 1230.0 385.0 265.0
Computer science 7220.0 225.0 810.0 925.0 310.0
Engineering & technology 9420.0 405.0 1740.0 1010.0 580.0
Architecture, building & planning 3345.0 230.0 380.0 270.0 170.0
Total - Science subject areas 75930.0 4770.0 15995.0 5780.0 4110.0
Social studies 14925.0 1355.0 3795.0 1465.0 1380.0
Law 4470.0 995.0 2870.0 485.0 425.0
Business & administrative studies 20150.0 1500.0 2295.0 1790.0 1375.0
Mass communications & documentation 5210.0 165.0 455.0 520.0 355.0
Languages 9340.0 940.0 3165.0 975.0 870.0
Historical & philosophical studies 6140.0 715.0 2640.0 705.0 635.0
Creative arts & design 19925.0 920.0 2170.0 1865.0 1290.0
Education 9590.0 370.0 1485.0 285.0 400.0
Combined 245.0 40.0 80.0 25.0 35.0
Total 165930.0 11765.0 34950.0 13900.0 10875.0
In [197]:
fig = {'data' : [{'type' : 'pie',
                  'labels' : outcomes.columns.tolist(),
                  'values' : outcomes.loc['Medicine & dentistry'],
                  'name' : 'Medicine & dentistry',
                  'direction' : 'clockwise',
                  'hole' : 0.5}],
       'layout' : {'title' : 'Outcomes for medicine and dentistry students'}}

pyo.iplot(fig)
In [198]:
info = "Medicine & Dentistry students are more likely to be employed than students from any other subject area"

fig['layout'].update({'annotations' : [{'text' : info,
                                       'xref' : 'paper',
                                       'yref' : 'paper',
                                       'x' : 0.5,
                                       'y' : 0.5,
                                       'showarrow' : False}]})

pyo.iplot(fig)
In [199]:
# Adding Line Breaks
info = "<b>Medicine &<br>Dentistry students<br>are more likely to<br>be employed than<br>students from any<br>other subject<br>area</b>"

fig['layout'].update({'annotations' : [{'text' : info,
                                       'xref' : 'paper',
                                       'yref' : 'paper',
                                       'x' : 0.5,
                                       'y' : 0.5,
                                       'showarrow' : False,
                                       'font' : {'size' : 16}}]})

pyo.iplot(fig)
In [200]:
fig['data'][0].update({'hole' : 0.55})
pyo.iplot(fig)

SunBurst Chart

Sunburst plots visualize hierarchical data spanning outwards radially from root to leaves. The sunburst sector hierarchy is determined by the entries in labels (names in px.sunburst) and in parents. The root starts from the center and children are added to the outer rings.

Main arguments:

  1. labels (names in px.sunburst since labels is reserved for overriding columns names): sets the labels of sunburst sectors.
  2. parents: sets the parent sectors of sunburst sectors. An empty string '' is used for the root node in the hierarchy. In this example, the root is "Eve".
  3. values: sets the values associated with sunburst sectors, determining their width (See the branchvalues section below for different modes for setting the width).

Basic Plot

In [201]:
# Using plotly express
import plotly.express as px
data = dict(
    character=["Eve", "Cain", "Seth", "Enos", "Noam", "Abel", "Awan", "Enoch", "Azura"],
    parent=["", "Eve", "Eve", "Seth", "Seth", "Eve", "Eve", "Awan", "Eve" ],
    value=[10, 14, 12, 10, 2, 6, 6, 4, 4])

fig =px.sunburst(
    data,
    names='character',
    parents='parent',
    values='value',
)
fig.show()
In [202]:
# Using go sunburn
import plotly.graph_objects as go

fig =go.Figure(go.Sunburst(
    labels=["Eve", "Cain", "Seth", "Enos", "Noam", "Abel", "Awan", "Enoch", "Azura"],
    parents=["", "Eve", "Eve", "Seth", "Seth", "Eve", "Eve", "Awan", "Eve" ],
    values=[10, 14, 12, 10, 2, 6, 6, 4, 4],
))
# Update layout for tight margin
# See https://plot.ly/python/creating-and-updating-figures/
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))

fig.show()

Sunburst with Repeated Labels

In [203]:
fig =go.Figure(go.Sunburst(
 ids=[
    "North America", "Europe", "Australia", "North America - Football", "Soccer",
    "North America - Rugby", "Europe - Football", "Rugby",
    "Europe - American Football","Australia - Football", "Association",
    "Australian Rules", "Autstralia - American Football", "Australia - Rugby",
    "Rugby League", "Rugby Union"
  ],
  labels= [
    "North<br>America", "Europe", "Australia", "Football", "Soccer", "Rugby",
    "Football", "Rugby", "American<br>Football", "Football", "Association",
    "Australian<br>Rules", "American<br>Football", "Rugby", "Rugby<br>League",
    "Rugby<br>Union"
  ],
  parents=[
    "", "", "", "North America", "North America", "North America", "Europe",
    "Europe", "Europe","Australia", "Australia - Football", "Australia - Football",
    "Australia - Football", "Australia - Football", "Australia - Rugby",
    "Australia - Rugby"
  ],
))
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))

fig.show()

Branchvalues

In [204]:
import plotly.graph_objects as go

fig =go.Figure(go.Sunburst(
    labels=[ "Eve", "Cain", "Seth", "Enos", "Noam", "Abel", "Awan", "Enoch", "Azura"],
    parents=["",    "Eve",  "Eve",  "Seth", "Seth", "Eve",  "Eve",  "Awan",  "Eve" ],
    values=[  65,    14,     12,     10,     2,      6,      6,      4,       4],
    branchvalues="total",
))
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))

fig.show()

Large Number of Slices

In [205]:
import plotly.graph_objects as go
import pandas as pd

df1 = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/718417069ead87650b90472464c7565dc8c2cb1c/sunburst-coffee-flavors-complete.csv')
df2 = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/718417069ead87650b90472464c7565dc8c2cb1c/coffee-flavors.csv')

fig = go.Figure()

fig.add_trace(go.Sunburst(
    ids=df1.ids,
    labels=df1.labels,
    parents=df1.parents,
    domain=dict(column=0)
))

fig.add_trace(go.Sunburst(
    ids=df2.ids,
    labels=df2.labels,
    parents=df2.parents,
    domain=dict(column=1),
    maxdepth=2
))

fig.update_layout(
    grid= dict(columns=2, rows=1),
    margin = dict(t=0, l=0, r=0, b=0)
)

fig.show()

Sunburst chart with a continuous colorscale

The example below visualizes a breakdown of sales (corresponding to sector width) and call success rate (corresponding to sector color) by region, county and salesperson level. For example, when exploring the data you can see that although the East region is behaving poorly, the Tyler county is still above average -- however, its performance is reduced by the poor success rate of salesperson GT.

In the right subplot which has a maxdepth of two levels, click on a sector to see its breakdown to lower levels.

In [206]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/sales_success.csv')
print(df.head())

levels = ['salesperson', 'county', 'region'] # levels used for the hierarchical chart
color_columns = ['sales', 'calls']
value_column = 'calls'

def build_hierarchical_dataframe(df, levels, value_column, color_columns=None):
    """
    Build a hierarchy of levels for Sunburst or Treemap charts.

    Levels are given starting from the bottom to the top of the hierarchy, 
    ie the last level corresponds to the root.
    """
    df_all_trees = pd.DataFrame(columns=['id', 'parent', 'value', 'color'])
    for i, level in enumerate(levels):
        df_tree = pd.DataFrame(columns=['id', 'parent', 'value', 'color'])
        dfg = df.groupby(levels[i:]).sum(numerical_only=True)
        dfg = dfg.reset_index()
        df_tree['id'] = dfg[level].copy()
        if i < len(levels) - 1:
            df_tree['parent'] = dfg[levels[i+1]].copy()
        else:
            df_tree['parent'] = 'total'
        df_tree['value'] = dfg[value_column]
        df_tree['color'] = dfg[color_columns[0]] / dfg[color_columns[1]]
        df_all_trees = df_all_trees.append(df_tree, ignore_index=True)
    total = pd.Series(dict(id='total', parent='', 
                              value=df[value_column].sum(),
                              color=df[color_columns[0]].sum() / df[color_columns[1]].sum()))
    df_all_trees = df_all_trees.append(total, ignore_index=True)
    return df_all_trees


df_all_trees = build_hierarchical_dataframe(df, levels, value_column, color_columns)
average_score = df['sales'].sum() / df['calls'].sum()

fig = make_subplots(1, 2, specs=[[{"type": "domain"}, {"type": "domain"}]],)
    
fig.add_trace(go.Sunburst(
    labels=df_all_trees['id'],
    parents=df_all_trees['parent'],
    values=df_all_trees['value'],
    branchvalues='total',
    marker=dict(
        colors=df_all_trees['color'],
        colorscale='RdBu',
        cmid=average_score),
    hovertemplate='<b>%{label} </b> <br> Sales: %{value}<br> Success rate: %{color:.2f}',
    name=''
    ), 1, 1)

fig.add_trace(go.Sunburst(
    labels=df_all_trees['id'],
    parents=df_all_trees['parent'],
    values=df_all_trees['value'],
    branchvalues='total',
    marker=dict(
        colors=df_all_trees['color'],
        colorscale='RdBu',
        cmid=average_score),
    hovertemplate='<b>%{label} </b> <br> Sales: %{value}<br> Success rate: %{color:.2f}',
    maxdepth=2
    ), 1, 2)

fig.update_layout(margin=dict(t=10, b=10, r=10, l=10))
fig.show()
   Unnamed: 0 region   county salesperson  calls  sales
0           0  North   Dallam          JE     35     23
1           1  North   Dallam          ZQ     49     13
2           2  North   Dallam          IJ     20      6
3           3  North  Hartley          WE     39     37
4           4  North  Hartley          PL     42     37

Bar Plot

Intro

Bar Chart: A barchart is used to display categorical data, with the size of the bar representing the quantity (or sometimes proportion) in that particular category. Barcharts make it easy to compare different categories because we can easily assess which bars are longer than others. Barcharts can be horizontal or vertical.

In [207]:
meteorite = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsPerYear.csv", index_col = 0)
meteorite.head()
Out[207]:
count
year
1970 48
1971 49
1972 32
1973 31
1974 691
In [208]:
numberOfMeteorites = {'type' : 'bar',
                     'x' : meteorite.index,
                     'y' : meteorite['count']}

pyo.iplot([numberOfMeteorites])
In [209]:
# Adding layout
layout = {'title' : "Number of meteorites found per year",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
fig = {'data' : [numberOfMeteorites],
      'layout' : layout}
pyo.iplot(fig)

Styling options for bar charts

In [210]:
numberOfMeteorites = {'type' : 'bar',
                      'x' : meteorite.index,
                      'y' : meteorite['count']}

layout = {'title' : "Number of meteorites found per year",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
fig = {'data' : [numberOfMeteorites],
      'layout' : layout}
pyo.iplot(fig)
Changing the colour of the bars

access the 'color' property within 'marker' sub-dictionary

In [211]:
fig['data'][0].update({'marker' : {'color' : 'lightblue'}})
pyo.iplot(fig)
Styling the bars

We can also change different styling options for the bars. Adding a grey outline to each bar:

In [212]:
fig['data'][0]['marker'].update({'line' : {'color' : '#333',
                                          'width' : 2}})
pyo.iplot(fig)
Opacity
In [213]:
fig['data'][0].update({'opacity' : 0.5})
pyo.iplot(fig)
Styling individual bars
In [214]:
numberOfMeteorites = {'type' : 'bar',
                     'x' : meteorite.index,
                     'y' : meteorite['count'],
                     'marker' : {'color' : 'lightblue',
                                'line' : {'color' : '#333',
                                          'width' : 2}},
                     'opacity' : 0.5,}
                     

layout = {'title' : "Number of meteorites found per year",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
fig = {'data' : [numberOfMeteorites],
      'layout' : layout}
pyo.iplot(fig)
In [215]:
colours = ['lightblue' for x in meteorite.index ]
colours
Out[215]:
['lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue']
In [216]:
colours = ['lightblue' if x != 2003 else 'purple' for x in meteorite.index ]
colours
Out[216]:
['lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'purple',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue']
In [217]:
numberOfMeteorites = {'type' : 'bar',
                     'x' : meteorite.index,
                     'y' : meteorite['count'],
                     'marker' : {'color' : colours,
                                'line' : {'color' : '#333',
                                          'width' : 2}},
                     'opacity' : 0.5,}
                     

layout = {'title' : "Number of meteorites found per year",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
fig = {'data' : [numberOfMeteorites],
      'layout' : layout}
pyo.iplot(fig)
In [218]:
# Finally adding annotation

fig['layout']['annotations'].append({'text' : 'Comet Bradfield 12,000km from Earth',
                                       'x' : 2003, 
                                       'y' : 3323,
                                       'showarrow' : True})
pyo.iplot(fig)

Making horizontal barcharts

In [219]:
meteorite = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsPerYear.csv", index_col = 0)
meteorite.head()
Out[219]:
count
year
1970 48
1971 49
1972 32
1973 31
1974 691
In [220]:
colours = ['lightblue' if x != 2003 else 'purple' for x in meteorite.index ]
numberOfMeteorites = {'type' : 'bar',
                     'x' : meteorite.index,
                     'y' : meteorite['count'],
                     'marker' : {'color' : colours,
                                'line' : {'color' : '#333',
                                          'width' : 2}},
                     'opacity' : 0.5,}
                     

layout = {'title' : "Number of meteorites found per year",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False},
                         {'text' : 'Comet Bradfield 12,000km from Earth',
                                       'x' : 2003, 
                                       'y' : 3323,
                                       'showarrow' : True}]}
fig = {'data' : [numberOfMeteorites],
      'layout' : layout}
pyo.iplot(fig)

Set the 'orientation' parameter in the trace to 'h' to tell Plotly that we want this trace to be for a horizontal barchart.

In [221]:
colours = ['lightblue' if x != 2003 else 'purple' for x in meteorite.index ]
numberOfMeteorites = {'type' : 'bar',
                      #NEW CODE GOES HERE
                      'orientation' : 'h',
                     'x' : meteorite.index,
                     'y' : meteorite['count'],
                     'marker' : {'color' : colours,
                                'line' : {'color' : '#333',
                                          'width' : 2}},
                     'opacity' : 0.5,}
                     

layout = {'title' : "Number of meteorites found per year",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False},
                         {'text' : 'Comet Bradfield 12,000km from Earth',
                                       'x' : 2003, 
                                       'y' : 3323,
                                       'showarrow' : True}]}
fig = {'data' : [numberOfMeteorites],
      'layout' : layout}
pyo.iplot(fig)

Above case we also need to swipe X and Y values

In [222]:
colours = ['lightblue' if x != 2003 else 'purple' for x in meteorite.index ]
numberOfMeteorites = {'type' : 'bar',
                      'orientation' : 'h',
                      #SWAP X- AND Y-COORDINATES HERE
                     'x' : meteorite['count'],
                     'y' : meteorite.index,
                     'marker' : {'color' : colours,
                                'line' : {'color' : '#333',
                                          'width' : 2}},
                     'opacity' : 0.5,}
                     

layout = {'title' : "Number of meteorites found per year",
         'xaxis' : {'title' : 'Number of meteorites'},
         'yaxis' : {'title' : 'Year'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False},
                         {'text' : 'Comet Bradfield 12,000km from Earth',
                          #SWAP X- AND Y-COORDINATES HERE
                            'x' : 3323, 
                            'y' : 2003,
                            'showarrow' : True}]}
fig = {'data' : [numberOfMeteorites],
      'layout' : layout}
pyo.iplot(fig)

Plotting multiple bar traces

In [223]:
meteorites = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoritesByContinent.csv", index_col = 0)
meteorites.head()
Out[223]:
continent year count
0 Africa 2000 239
1 Africa 2001 87
2 Africa 2002 109
3 Africa 2003 30
4 Africa 2004 17
In [224]:
continents = list(meteorites['continent'].unique())
continents
Out[224]:
['Africa',
 'Antarctica',
 'Asia',
 'Australia',
 'Europe',
 'North America',
 'South America']
In [225]:
traces = []
for c in continents:
    traces.append({'type' : 'bar',
                  'name' : c,
                  'x' : meteorites.loc[meteorites['continent'] == c, 'year'],
                  'y' : meteorites.loc[meteorites['continent'] == c, 'count']})
    
layout = {'title' : "Meteorites found by continent, 2000 - 2012",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
In [228]:
# Another way of grouping

years = list(meteorites['year'].unique())
years
Out[228]:
[2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012]
In [229]:
traces = []
for y in years:
    traces.append({'type' : 'bar',
                  'name' : y,
                   # CHANGE TO SPLIT BY YEAR
                  'x' : meteorites.loc[meteorites['year'] == y, 'continent'],
                  'y' : meteorites.loc[meteorites['year'] == y, 'count'],
                  'opacity' : 0.7})
    
layout = {'title' : "Meteorites found by continent, 2000 - 2012",
         'xaxis' : {'title' : 'Continent'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}

fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-229-768022cf924c> in <module>
     20                           'showarrow' : False}]}
     21 fig = {'data' : traces, 'layout' : layout}
---> 22 pyo.iplot(fig)

~\AppData\Roaming\Python\Python37\site-packages\plotly\offline\offline.py in iplot(figure_or_data, show_link, link_text, validate, image, filename, image_width, image_height, config, auto_play, animation_opts)
    381 
    382     # Get figure
--> 383     figure = tools.return_figure_from_figure_or_data(figure_or_data, validate)
    384 
    385     # Handle image request

~\AppData\Roaming\Python\Python37\site-packages\plotly\tools.py in return_figure_from_figure_or_data(figure_or_data, validate_figure)
    551 
    552         try:
--> 553             figure = Figure(**figure).to_dict()
    554         except exceptions.PlotlyError as err:
    555             raise exceptions.PlotlyError(

~\AppData\Roaming\Python\Python37\site-packages\plotly\graph_objs\_figure.py in __init__(self, data, layout, frames, skip_invalid, **kwargs)
    606             is invalid AND skip_invalid is False
    607         """
--> 608         super(Figure, self).__init__(data, layout, frames, skip_invalid, **kwargs)
    609 
    610     def add_area(

~\AppData\Roaming\Python\Python37\site-packages\plotly\basedatatypes.py in __init__(self, data, layout_plotly, frames, skip_invalid, **kwargs)
    155 
    156         # ### Import traces ###
--> 157         data = self._data_validator.validate_coerce(data, skip_invalid=skip_invalid)
    158 
    159         # ### Save tuple of trace objects ###

~\AppData\Roaming\Python\Python37\site-packages\_plotly_utils\basevalidators.py in validate_coerce(self, v, skip_invalid)
   2654                     else:
   2655                         trace = self.class_map[trace_type](
-> 2656                             skip_invalid=skip_invalid, **v_copy
   2657                         )
   2658                         res.append(trace)

~\AppData\Roaming\Python\Python37\site-packages\plotly\graph_objs\__init__.py in __init__(self, arg, alignmentgroup, base, basesrc, cliponaxis, constraintext, customdata, customdatasrc, dx, dy, error_x, error_y, hoverinfo, hoverinfosrc, hoverlabel, hovertemplate, hovertemplatesrc, hovertext, hovertextsrc, ids, idssrc, insidetextanchor, insidetextfont, legendgroup, marker, meta, metasrc, name, offset, offsetgroup, offsetsrc, opacity, orientation, outsidetextfont, r, rsrc, selected, selectedpoints, showlegend, stream, t, text, textangle, textfont, textposition, textpositionsrc, textsrc, texttemplate, texttemplatesrc, tsrc, uid, uirevision, unselected, visible, width, widthsrc, x, x0, xaxis, xcalendar, xsrc, y, y0, yaxis, ycalendar, ysrc, **kwargs)
  91155         self["metasrc"] = metasrc if metasrc is not None else _v
  91156         _v = arg.pop("name", None)
> 91157         self["name"] = name if name is not None else _v
  91158         _v = arg.pop("offset", None)
  91159         self["offset"] = offset if offset is not None else _v

~\AppData\Roaming\Python\Python37\site-packages\plotly\basedatatypes.py in __setitem__(self, prop, value)
   3488             # ### Handle simple property ###
   3489             else:
-> 3490                 self._set_prop(prop, value)
   3491 
   3492         # Handle non-scalar case

~\AppData\Roaming\Python\Python37\site-packages\plotly\basedatatypes.py in _set_prop(self, prop, val)
   3775                 return
   3776             else:
-> 3777                 raise err
   3778 
   3779         # val is None

~\AppData\Roaming\Python\Python37\site-packages\plotly\basedatatypes.py in _set_prop(self, prop, val)
   3770         validator = self._validators.get(prop)
   3771         try:
-> 3772             val = validator.validate_coerce(val)
   3773         except ValueError as err:
   3774             if self._skip_invalid:

~\AppData\Roaming\Python\Python37\site-packages\_plotly_utils\basevalidators.py in validate_coerce(self, v)
   1078                     v = str(v)
   1079                 else:
-> 1080                     self.raise_invalid_val(v)
   1081 
   1082             if self.no_blank and len(v) == 0:

~\AppData\Roaming\Python\Python37\site-packages\_plotly_utils\basevalidators.py in raise_invalid_val(self, v, inds)
    281                 typ=type_str(v),
    282                 v=repr(v),
--> 283                 valid_clr_desc=self.description(),
    284             )
    285         )

ValueError: 
    Invalid value of type 'numpy.int64' received for the 'name' property of bar
        Received value: 2000

    The 'name' property is a string and must be specified as:
      - A string
      - A number that will be converted to a string

Creating stacked barcharts

In [232]:
#lets us see the charts in an iPython Notebook
pyo.offline.init_notebook_mode() # run at the start of every ipython 
In [233]:
meteorites = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoritesByContinent.csv", index_col = 0)
meteorites.head()
Out[233]:
continent year count
0 Africa 2000 239
1 Africa 2001 87
2 Africa 2002 109
3 Africa 2003 30
4 Africa 2004 17
In [234]:
continents = list(meteorites['continent'].unique())
continents
Out[234]:
['Africa',
 'Antarctica',
 'Asia',
 'Australia',
 'Europe',
 'North America',
 'South America']
In [235]:
traces = []
for c in continents:
    traces.append({'type' : 'bar',
                  'name' : c,
                  'x' : meteorites.loc[meteorites['continent'] == c, 'year'],
                  'y' : meteorites.loc[meteorites['continent'] == c, 'count'],
                  'opacity' : 0.7})
    
layout = {'title' : "Meteorites found by continent, 2000 - 2012",
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
In [ ]:
# Changing the 'barmode'

The 'barmode' option is contained within the layout and can take one of two options: 'stack' - makes a stacked bar chart 'group' - the default, makes a grouped bar chart (as above) Let's change the 'barmode' to 'stacked' for this chart:

In [236]:
fig['layout'].update({'barmode' : 'stack'})
pyo.iplot(fig)
In [237]:
continents = ['Antarctica',
              'Asia',
              'Africa',
              'South America',
              'North America',
              'Australia',
              'Europe',]
In [238]:
traces = []
for c in continents:
    traces.append({'type' : 'bar',
                  'name' : c,
                  'x' : meteorites.loc[meteorites['continent'] == c, 'year'],
                  'y' : meteorites.loc[meteorites['continent'] == c, 'count'],
                  'opacity' : 0.7})
    
layout = {'title' : "Meteorites found by continent, 2000 - 2012",
          'barmode' : 'stack',
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Number of meteorites'},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)

Stacked proportional bar charts

In [239]:
meteorites = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoritesByContinent.csv", index_col = 0)
meteorites.head()
Out[239]:
continent year count
0 Africa 2000 239
1 Africa 2001 87
2 Africa 2002 109
3 Africa 2003 30
4 Africa 2004 17
In [240]:
# Firstly we need a list of the unique continents, which we'll use to calculate the proportions:
continents = list(meteorites['continent'].unique())
continents
Out[240]:
['Africa',
 'Antarctica',
 'Asia',
 'Australia',
 'Europe',
 'North America',
 'South America']
In [241]:
meteorites = meteorites.pivot(index='year',columns = 'continent', values='count')
meteorites.head()
Out[241]:
continent Africa Antarctica Asia Australia Europe North America South America
year
2000 239.0 806.0 389.0 NaN 15.0 46.0 4.0
2001 87.0 499.0 636.0 NaN 1.0 50.0 5.0
2002 109.0 586.0 281.0 2.0 10.0 37.0 5.0
2003 30.0 1422.0 207.0 1.0 7.0 32.0 14.0
2004 17.0 30.0 155.0 1.0 5.0 53.0 3.0
In [242]:
meteorites.fillna(value = 0, inplace = True)
meteorites.head()
Out[242]:
continent Africa Antarctica Asia Australia Europe North America South America
year
2000 239.0 806.0 389.0 0.0 15.0 46.0 4.0
2001 87.0 499.0 636.0 0.0 1.0 50.0 5.0
2002 109.0 586.0 281.0 2.0 10.0 37.0 5.0
2003 30.0 1422.0 207.0 1.0 7.0 32.0 14.0
2004 17.0 30.0 155.0 1.0 5.0 53.0 3.0
In [243]:
# using df.sum()
meteorites['total'] = meteorites.sum(axis = 1)
meteorites.head()
Out[243]:
continent Africa Antarctica Asia Australia Europe North America South America total
year
2000 239.0 806.0 389.0 0.0 15.0 46.0 4.0 1499.0
2001 87.0 499.0 636.0 0.0 1.0 50.0 5.0 1278.0
2002 109.0 586.0 281.0 2.0 10.0 37.0 5.0 1030.0
2003 30.0 1422.0 207.0 1.0 7.0 32.0 14.0 1713.0
2004 17.0 30.0 155.0 1.0 5.0 53.0 3.0 264.0
In [244]:
#  loop through our list of continents, calculating the proportion of meteorites each year that fell in each continent:
for c in continents:
    meteorites["{}_pc".format(c)] = meteorites[c] / meteorites['total']
    
meteorites.head()
Out[244]:
continent Africa Antarctica Asia Australia Europe North America South America total Africa_pc Antarctica_pc Asia_pc Australia_pc Europe_pc North America_pc South America_pc
year
2000 239.0 806.0 389.0 0.0 15.0 46.0 4.0 1499.0 0.159440 0.537692 0.259506 0.000000 0.010007 0.030687 0.002668
2001 87.0 499.0 636.0 0.0 1.0 50.0 5.0 1278.0 0.068075 0.390454 0.497653 0.000000 0.000782 0.039124 0.003912
2002 109.0 586.0 281.0 2.0 10.0 37.0 5.0 1030.0 0.105825 0.568932 0.272816 0.001942 0.009709 0.035922 0.004854
2003 30.0 1422.0 207.0 1.0 7.0 32.0 14.0 1713.0 0.017513 0.830123 0.120841 0.000584 0.004086 0.018681 0.008173
2004 17.0 30.0 155.0 1.0 5.0 53.0 3.0 264.0 0.064394 0.113636 0.587121 0.003788 0.018939 0.200758 0.011364
In [245]:
pcContinents = [c + "_pc" for c in continents]

sortKeys = dict(meteorites[pcContinents].sum())

pcContinents = sorted(pcContinents, key=lambda k: sortKeys[k], reverse=True)
pcContinents
Out[245]:
['Asia_pc',
 'Antarctica_pc',
 'North America_pc',
 'South America_pc',
 'Africa_pc',
 'Europe_pc',
 'Australia_pc']
In [246]:
traces = []

for c in pcContinents:
    traces.append({'type' : 'bar',
                  'name' : c[:-3],
                  'x' : meteorites.index,
                  'y' : meteorites[c],
                  'opacity' : 0.7})

pyo.iplot(traces)
In [247]:
# Stacked chart

layout = {'title' : "Proportion of meteorites found by continent, 2000 - 2012",
          'barmode' : 'stack',
         'xaxis' : {'title' : 'Year'},
         'yaxis' : {'title' : 'Proportion of meteorites',
                   'tickformat' : '%',
                   'hoverformat' : '%',},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)

Two types of trace on the same chart

In [248]:
sizes = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsbyWeightPerYear.csv", index_col = 0)
sizes.head()
Out[248]:
101 - 500g 501 - 1000g less than 101g more than 1000g
year
2000 522 137 888 232
2001 503 144 818 185
2002 497 123 1249 203
2003 555 134 2425 208
2004 394 119 1289 136
In [249]:
sizeStrings = sizes.columns.tolist()
sizeStrings
Out[249]:
['101 - 500g', '501 - 1000g', 'less than 101g', 'more than 1000g']
In [250]:
# Calculating the total and percentages:
sizes['total'] = sizes.sum(axis = 1)
for s in sizeStrings:
    sizes["{}_pc".format(s)] = sizes[s] / sizes['total']
    
sizes.head()
Out[250]:
101 - 500g 501 - 1000g less than 101g more than 1000g total 101 - 500g_pc 501 - 1000g_pc less than 101g_pc more than 1000g_pc
year
2000 522 137 888 232 1779 0.293423 0.077010 0.499157 0.130410
2001 503 144 818 185 1650 0.304848 0.087273 0.495758 0.112121
2002 497 123 1249 203 2072 0.239865 0.059363 0.602799 0.097973
2003 555 134 2425 208 3322 0.167068 0.040337 0.729982 0.062613
2004 394 119 1289 136 1938 0.203302 0.061404 0.665119 0.070175
In [261]:
layout = {'title' : "Proportion of meteorites found by continent, 2000 - 2012",
          'barmode' : 'stack',
          'xaxis' : {'title' : 'Year'},
          'yaxis' : {'title' : 'Proportion of meteorites',
                   'tickformat' : '%',
                   'hoverformat' : '%',},
         'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                          'font' : {'color' : 'grey',
                                   'size' : 10},
                          'xref' : 'paper',
                          'yref' : 'paper',
                          'x' : 0,
                          'y' : -0.2,
                          'showarrow' : False}]}
trace2 = {'type' : 'scatter',
                       'mode' : 'lines+markers',
                       'x' : sizes.index,
                       'y' : sizes['less than 101g_pc'],
                       'marker' : {'color' : '#333'},
                       'name' : 'Meteorite < 101g'}
    

fig = {'data' : traces, 'layout' : layout}

# Adding 2nd trace to data
fig['data'].append({   'type' : 'scatter',
                       'mode' : 'lines+markers',
                       'x' : sizes.index,
                       'y' : sizes['less than 101g_pc'],
                       'marker' : {'color' : '#333'},
                       'name' : 'Meteorite < 101g'})
pyo.iplot(fig)

Dashboard

Subplot

In [262]:
fig = make_subplots(rows = 3, cols = 3,
                    specs = [[{'rowspan' : 2, 'colspan' : 2}, None, {'rowspan' : 2}],
                            [None, None, None],
                            [{'colspan' : 3}, None, None]],
                    subplot_titles = ["Types of meteorite by weight", 
                                     "Number of meteorites by continent",
                                     "Weight categories of meteorite",])
fig.append_trace({'type' : 'scatter'}, row = 1, col = 1)
pyo.iplot(fig)
In [263]:
# Adding first chart
stacked = py.get_figure("rmuir", 241)
for d in stacked['data']:
    xVals = d['y']
    yVals = d['x']
    d.update({'orientation' : 'h',
             'x' : xVals,
             'y' : yVals})
    fig.append_trace(d, row = 1, col = 3)
    
pyo.iplot(fig)
In [264]:
# Styling the bove chart
fig['layout']['xaxis2'].update({'tickformat' : '%',
                               'hoverformat' : '%'})

fig['layout'].update({'barmode' : 'stack',
                      'height' : 1000})

pyo.iplot(fig)
In [265]:
# Full code

fig = make_subplots(rows = 3, cols = 3,
                   specs = [[{'rowspan' : 2, 'colspan' : 2}, None, {'rowspan' : 2}],
                            [None, None, None],
                            [{'colspan' : 3}, None, None]],
                   subplot_titles = ["Types of meteorite by weight", 
                                     "Number of meteorites by continent",
                                     "Weight categories of meteorite",
                                    ])

stacked = py.get_figure("rmuir", 241)
for d in stacked['data']:
    xVals = d['y']
    yVals = d['x']
    d.update({'orientation' : 'h',
             'x' : xVals,
             'y' : yVals})
    fig.append_trace(d, row = 1, col = 3)
    
fig['layout']['xaxis2'].update({'tickformat' : '%',
                               'hoverformat' : '%'})

fig['layout'].update({'barmode' : 'stack',
                      'height' : 1000})

pyo.iplot(fig)
In [266]:
# How many meteorites were found each year in each weight category?

sizes = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsbyWeightPerYear.csv", index_col = 0)

sizeStrings = sizes.columns.tolist()

sizes['total'] = sizes.sum(axis = 1)
for s in sizeStrings:
    sizes["{}_pc".format(s)] = sizes[s] / sizes['total']
    
sizes.head()
Out[266]:
101 - 500g 501 - 1000g less than 101g more than 1000g total 101 - 500g_pc 501 - 1000g_pc less than 101g_pc more than 1000g_pc
year
2000 522 137 888 232 1779 0.293423 0.077010 0.499157 0.130410
2001 503 144 818 185 1650 0.304848 0.087273 0.495758 0.112121
2002 497 123 1249 203 2072 0.239865 0.059363 0.602799 0.097973
2003 555 134 2425 208 3322 0.167068 0.040337 0.729982 0.062613
2004 394 119 1289 136 1938 0.203302 0.061404 0.665119 0.070175
In [267]:
for s in sizeStrings:
    fig.append_trace({'type' : 'scatter',
                     'mode' : 'markers+lines',
                     'x' : sizes.index,
                     'y' : sizes["{}_pc".format(s)],
                     'name' : s},
                    row = 3, col = 1)
    
fig['layout']['yaxis3'].update({'tickformat' : '%',
                               'hoverformat' : '%'})

pyo.iplot(fig)
In [269]:
typeWeight = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsbyWeightAndType.csv", index_col = 0)
typeWeight.head()
Out[269]:
wideClass mass (g)
18 LL 700.0
22 Eucrite 252.0
30 Ureilite 3950.0
49 L 9500.0
82 H 29560.0
In [270]:
fig.append_trace({'type' : 'scatter',
             'mode' : "markers",
             'x' : typeWeight['wideClass'],
             'y' : typeWeight['mass (g)'],
                'showlegend' : False},
                row = 1, col = 1)

fig['layout']['yaxis1'].update({'title' : 'Weight (g)'})
pyo.iplot(fig)

Combined all

In [271]:
fig = make_subplots(rows = 3, cols = 3,
                   specs = [[{'rowspan' : 2, 'colspan' : 2}, None, {'rowspan' : 2}],
                            [None, None, None],
                            [{'colspan' : 3}, None, None]],
                   subplot_titles = ["Types of meteorite by weight", 
                                     "Number of meteorites by continent",
                                     "Weight categories of meteorite",])



stackedBarColours = ["rgb(95,134,183)", "rgb(177,200,235)", "rgb(1,54,136)", 
                     "rgb(237,180,236)", "rgb(104,12,113)", "rgb(241,82,182)", 
                     "rgb(101,230,249)",]    

stacked = py.get_figure("rmuir", 241)
for i, d in enumerate(stacked['data']):
    xVals = d['y']
    yVals = d['x']
    d.update({'orientation' : 'h',
             'x' : xVals,
             'y' : yVals,
             'marker' : {'color' : stackedBarColours[i]},
             'legendgroup' : 'continents'})
    fig.append_trace(d, row = 1, col = 3)
    
    
    
fig['layout']['xaxis2'].update({'tickformat' : '%',
                               'hoverformat' : '%'})
fig['layout'].update({'barmode' : 'stack',
                      'height' : 1000})

pyo.iplot(fig)
In [272]:
sizes = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsbyWeightPerYear.csv", index_col = 0)

sizeStrings = sizes.columns.tolist()

sizes['total'] = sizes.sum(axis = 1)
for s in sizeStrings:
    sizes["{}_pc".format(s)] = sizes[s] / sizes['total']
    
sizeColours = ["rgb(194,87,211)", "rgb(77,87,168)", "rgb(223,184,245)", "rgb(88,38,166)"]

    
for i, s in enumerate(sizeStrings):
    fig.append_trace({'type' : 'scatter',
                     'mode' : 'markers+lines',
                     'x' : sizes.index,
                     'y' : sizes["{}_pc".format(s)],
                     'name' : s,
                     'marker' : {'color' : sizeColours[i]},
                     'legendgroup' : 'weightsbyyear'},
                    row = 3, col = 1)
    
fig['layout']['yaxis3'].update({'tickformat' : '%',
                               'hoverformat' : '%'})

pyo.iplot(fig)
In [273]:
fig.append_trace({'type' : 'scatter',
             'mode' : "markers",
             'x' : typeWeight['wideClass'],
             'y' : typeWeight['mass (g)'],
                'showlegend' : False,
                  
                  #NEW CODE GOES HERE:
                 'hoverinfo' : 'x+y',
                 'marker' : {'color' : "rgba(107,20,214, 0.5)"}},
                row = 1, col = 1)

fig['layout']['yaxis1'].update({'title' : 'Weight (g)'})
pyo.iplot(fig)
In [274]:
fig['layout'].update({'legend' : {'tracegroupgap' : 600}})

fig['layout']['annotations'].append({'font': {'color': 'grey', 'size': 10},
                      'showarrow': False,
                      'text': '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
                      'x': 0.1,
                      'xref': 'paper',
                      'y': -0.1,
                      'yref': 'paper'})
pyo.iplot(fig)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-274-a7e9d1809d4a> in <module>
      1 fig['layout'].update({'legend' : {'tracegroupgap' : 600}})
      2 
----> 3 fig['layout']['annotations'].append({'font': {'color': 'grey', 'size': 10},
      4                       'showarrow': False,
      5                       'text': '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',

AttributeError: 'tuple' object has no attribute 'append'
In [ ]: